WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 08 Jan 2009 06:57:56 -0800
Delivery-date: Thu, 08 Jan 2009 07:04:47 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1230090754 -32400
# Node ID 07f26e047fbfef8d3be9ceb0c878d294fb9d945b
# Parent  9837303a4708cf0bd558efb6676cef095f9c4406
# Parent  e2f36d066b7b66a538bbe240d46f49bede51d9ed
merge with xen-unstable.hg
---
 xen/arch/x86/cpu/mcheck/p4.c                                      |  270 
 xen/arch/x86/cpu/mcheck/p6.c                                      |  118 
 xen/arch/x86/rwlock.c                                             |   28 
 xen/include/asm-x86/rwlock.h                                      |   71 
 extras/mini-os/Makefile                                           |    8 
 extras/mini-os/arch/x86/mm.c                                      |   13 
 extras/mini-os/fs-front.c                                         |   14 
 extras/mini-os/include/xenbus.h                                   |    3 
 extras/mini-os/kernel.c                                           |   26 
 extras/mini-os/xenbus/xenbus.c                                    |   11 
 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in |    2 
 tools/firmware/hvmloader/hvmloader.c                              |   24 
 tools/firmware/rombios/rombios.c                                  | 4030 
+++++-----
 tools/firmware/rombios/rombios.h                                  |   70 
 tools/libxc/xc_dom_core.c                                         |    1 
 tools/libxc/xc_dom_x86.c                                          |    6 
 tools/libxc/xc_domain.c                                           |   14 
 tools/libxc/xc_domain_restore.c                                   |   16 
 tools/libxc/xc_domain_save.c                                      |   26 
 tools/libxc/xc_pm.c                                               |  138 
 tools/libxc/xc_private.h                                          |    3 
 tools/libxc/xc_ptrace.c                                           |   41 
 tools/libxc/xenctrl.h                                             |   48 
 tools/libxc/xg_private.c                                          |    1 
 tools/misc/xen-detect.c                                           |   24 
 tools/misc/xenpm.c                                                |  744 +
 tools/python/xen/lowlevel/acm/acm.c                               |    2 
 tools/python/xen/lowlevel/flask/flask.c                           |    1 
 tools/python/xen/lowlevel/xc/xc.c                                 |   11 
 tools/python/xen/lowlevel/xs/xs.c                                 |   17 
 tools/python/xen/xend/XendCheckpoint.py                           |    2 
 tools/python/xen/xend/XendConfig.py                               |    8 
 tools/python/xen/xend/XendDomainInfo.py                           |    9 
 tools/python/xen/xend/balloon.py                                  |   36 
 tools/python/xen/xend/server/blkif.py                             |   13 
 tools/xcutils/xc_save.c                                           |   12 
 tools/xenpmd/xenpmd.c                                             |    6 
 tools/xenstat/xentop/xentop.c                                     |    2 
 unmodified_drivers/linux-2.6/Makefile                             |    1 
 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h |    2 
 unmodified_drivers/linux-2.6/overrides.mk                         |    1 
 unmodified_drivers/linux-2.6/scsifront/Kbuild                     |    6 
 unmodified_drivers/linux-2.6/scsifront/Makefile                   |    3 
 xen/arch/ia64/xen/cpufreq/cpufreq.c                               |    1 
 xen/arch/x86/Makefile                                             |    1 
 xen/arch/x86/acpi/cpu_idle.c                                      |   49 
 xen/arch/x86/acpi/cpufreq/cpufreq.c                               |   73 
 xen/arch/x86/acpi/cpufreq/powernow.c                              |   11 
 xen/arch/x86/apic.c                                               |   33 
 xen/arch/x86/cpu/amd.c                                            |    4 
 xen/arch/x86/cpu/intel.c                                          |    4 
 xen/arch/x86/cpu/mcheck/Makefile                                  |    3 
 xen/arch/x86/cpu/mcheck/amd_k8.c                                  |    4 
 xen/arch/x86/cpu/mcheck/k7.c                                      |    5 
 xen/arch/x86/cpu/mcheck/mce.c                                     |   34 
 xen/arch/x86/cpu/mcheck/mce.h                                     |   17 
 xen/arch/x86/cpu/mcheck/mce_intel.c                               |  632 +
 xen/arch/x86/cpu/mcheck/non-fatal.c                               |   25 
 xen/arch/x86/cpu/mcheck/p5.c                                      |    1 
 xen/arch/x86/cpu/mcheck/x86_mca.h                                 |   19 
 xen/arch/x86/domctl.c                                             |   45 
 xen/arch/x86/hvm/hvm.c                                            |   26 
 xen/arch/x86/hvm/svm/intr.c                                       |    3 
 xen/arch/x86/hvm/vmx/entry.S                                      |   14 
 xen/arch/x86/hvm/vmx/intr.c                                       |   15 
 xen/arch/x86/hvm/vmx/realmode.c                                   |   45 
 xen/arch/x86/hvm/vmx/vmcs.c                                       |   61 
 xen/arch/x86/hvm/vmx/vmx.c                                        |  304 
 xen/arch/x86/i8259.c                                              |    1 
 xen/arch/x86/io_apic.c                                            |   14 
 xen/arch/x86/irq.c                                                |   67 
 xen/arch/x86/mm/shadow/common.c                                   |    9 
 xen/arch/x86/mm/shadow/multi.c                                    |    9 
 xen/arch/x86/msi.c                                                |  215 
 xen/arch/x86/oprofile/nmi_int.c                                   |    9 
 xen/arch/x86/physdev.c                                            |   24 
 xen/arch/x86/platform_hypercall.c                                 |   10 
 xen/arch/x86/setup.c                                              |    8 
 xen/arch/x86/smpboot.c                                            |   25 
 xen/arch/x86/time.c                                               |  126 
 xen/arch/x86/traps.c                                              |   20 
 xen/arch/x86/x86_32/asm-offsets.c                                 |    4 
 xen/arch/x86/x86_64/asm-offsets.c                                 |    6 
 xen/arch/x86/x86_64/compat/entry.S                                |    1 
 xen/arch/x86/x86_emulate/x86_emulate.h                            |    1 
 xen/common/domain.c                                               |   35 
 xen/common/schedule.c                                             |   46 
 xen/common/spinlock.c                                             |    6 
 xen/common/sysctl.c                                               |   25 
 xen/drivers/acpi/pmstat.c                                         |  306 
 xen/drivers/cpufreq/Makefile                                      |    1 
 xen/drivers/cpufreq/cpufreq.c                                     |   48 
 xen/drivers/cpufreq/cpufreq_misc_governors.c                      |  158 
 xen/drivers/cpufreq/cpufreq_ondemand.c                            |  143 
 xen/drivers/passthrough/amd/pci_amd_iommu.c                       |   20 
 xen/drivers/passthrough/io.c                                      |   35 
 xen/drivers/passthrough/iommu.c                                   |   59 
 xen/drivers/passthrough/pci.c                                     |   95 
 xen/drivers/passthrough/vtd/iommu.c                               |  292 
 xen/drivers/video/vesa.c                                          |   18 
 xen/drivers/video/vga.c                                           |   19 
 xen/include/acpi/cpufreq/cpufreq.h                                |   21 
 xen/include/asm-ia64/linux-xen/asm/spinlock.h                     |    2 
 xen/include/asm-x86/apicdef.h                                     |    2 
 xen/include/asm-x86/config.h                                      |    2 
 xen/include/asm-x86/cpufeature.h                                  |    1 
 xen/include/asm-x86/hvm/hvm.h                                     |    2 
 xen/include/asm-x86/hvm/trace.h                                   |    1 
 xen/include/asm-x86/hvm/vcpu.h                                    |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h                                |   16 
 xen/include/asm-x86/hvm/vmx/vmx.h                                 |    1 
 xen/include/asm-x86/irq.h                                         |    1 
 xen/include/asm-x86/mach-default/irq_vectors.h                    |    4 
 xen/include/asm-x86/msi.h                                         |    9 
 xen/include/asm-x86/msr-index.h                                   |    6 
 xen/include/asm-x86/perfc_defn.h                                  |    3 
 xen/include/asm-x86/processor.h                                   |    2 
 xen/include/asm-x86/spinlock.h                                    |   54 
 xen/include/asm-x86/system.h                                      |   22 
 xen/include/asm-x86/time.h                                        |    2 
 xen/include/asm-x86/x86_32/system.h                               |   10 
 xen/include/asm-x86/x86_64/system.h                               |   10 
 xen/include/public/arch-x86/xen-mca.h                             |   15 
 xen/include/public/domctl.h                                       |   12 
 xen/include/public/hvm/params.h                                   |    5 
 xen/include/public/physdev.h                                      |    9 
 xen/include/public/sysctl.h                                       |   86 
 xen/include/public/trace.h                                        |    1 
 xen/include/xen/iommu.h                                           |    2 
 xen/include/xen/lib.h                                             |    2 
 xen/include/xen/pci.h                                             |   12 
 xen/include/xen/sched.h                                           |    1 
 xen/include/xen/spinlock.h                                        |    6 
 xen/include/xen/time.h                                            |    1 
 134 files changed, 6259 insertions(+), 3231 deletions(-)

diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/Makefile   Wed Dec 24 12:52:34 2008 +0900
@@ -93,8 +93,12 @@ endif
 $(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds
        $(LD) -r -d $(LDFLAGS) -\( $^ -\) $(APP_LDLIBS) --undefined main -o $@
 
-$(OBJ_DIR)/$(TARGET): links $(OBJS) $(OBJ_DIR)/$(TARGET)_app.o arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJ_DIR)/$(TARGET)_app.o $(OBJS) 
$(LDARCHLIB) $(LDLIBS) -o $@.o
+ifneq ($(APP_OBJS),)
+APP_O=$(OBJ_DIR)/$(TARGET)_app.o 
+endif
+
+$(OBJ_DIR)/$(TARGET): links $(OBJS) $(APP_O) arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) 
-o $@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Wed Dec 24 12:52:34 2008 +0900
@@ -420,7 +420,9 @@ static unsigned long demand_map_area_sta
 #define DEMAND_MAP_PAGES ((2ULL << 30) / PAGE_SIZE)
 #endif
 
-#ifdef HAVE_LIBC
+#ifndef HAVE_LIBC
+#define HEAP_PAGES 0
+#else
 unsigned long heap, brk, heap_mapped, heap_end;
 #ifdef __x86_64__
 #define HEAP_PAGES ((128ULL << 30) / PAGE_SIZE)
@@ -591,7 +593,7 @@ void arch_init_mm(unsigned long* start_p
 void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p)
 {
 
-    unsigned long start_pfn, max_pfn;
+    unsigned long start_pfn, max_pfn, virt_pfns;
 
     printk("  _text:        %p\n", &_text);
     printk("  _etext:       %p\n", &_etext);
@@ -604,7 +606,12 @@ void arch_init_mm(unsigned long* start_p
     start_pfn = PFN_UP(to_phys(start_info.pt_base)) + 
                 start_info.nr_pt_frames + 3;
     max_pfn = start_info.nr_pages;
-   
+
+    /* We need room for demand mapping and heap, clip available memory */
+    virt_pfns = DEMAND_MAP_PAGES + HEAP_PAGES;
+    if (max_pfn + virt_pfns + 1 < max_pfn)
+        max_pfn = -(virt_pfns + 1);
+
     printk("  start_pfn:    %lx\n", start_pfn);
     printk("  max_pfn:      %lx\n", max_pfn);
 
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/fs-front.c
--- a/extras/mini-os/fs-front.c Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/fs-front.c Wed Dec 24 12:52:34 2008 +0900
@@ -867,18 +867,6 @@ moretodo:
     if(more) goto moretodo;
     
     in_irq = 0;
-}
-
-/* Small utility function to figure out our domain id */
-static domid_t get_self_id(void)
-{
-    char *dom_id;
-    domid_t ret; 
-
-    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
-    sscanf(dom_id, "%d", &ret);
-
-    return ret;
 }
 
 static void alloc_request_table(struct fs_import *import)
@@ -1066,7 +1054,7 @@ static int init_fs_import(struct fs_impo
     unmask_evtchn(import->local_port);
 
     
-    self_id = get_self_id(); 
+    self_id = xenbus_get_self_id(); 
     /* Write the frontend info to a node in our Xenbus */
     sprintf(nodename, "/local/domain/%d/device/vfs/%d", 
                         self_id, import->import_id);
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/include/xenbus.h   Wed Dec 24 12:52:34 2008 +0900
@@ -91,6 +91,9 @@ char* xenbus_printf(xenbus_transaction_t
                                   const char* fmt, ...)
                    __attribute__((__format__(printf, 4, 5)));
 
+/* Utility function to figure out our domain id */
+domid_t xenbus_get_self_id(void);
+
 /* Reset the XenBus system. */
 void fini_xenbus(void);
 
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/kernel.c   Wed Dec 24 12:52:34 2008 +0900
@@ -434,25 +434,25 @@ static void kbdfront_thread(void *p)
 
 static struct pcifront_dev *pci_dev;
 
+static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int 
slot, unsigned int fun)
+{
+    unsigned int vendor, device, rev, class;
+
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
+
+    printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, 
slot, fun, class, vendor, device, rev);
+}
+
 static void pcifront_thread(void *p)
 {
-    void print(unsigned int domain, unsigned int bus, unsigned int slot, 
unsigned int fun)
-    {
-        unsigned int vendor, device, rev, class;
-
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
-
-        printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, 
bus, slot, fun, class, vendor, device, rev);
-    }
-
     pci_dev = init_pcifront(NULL);
     if (!pci_dev)
         return;
     printk("PCI devices:\n");
-    pcifront_scan(pci_dev, print);
+    pcifront_scan(pci_dev, print_pcidev);
 }
 
 static void fs_thread(void *p)
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c    Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/xenbus/xenbus.c    Wed Dec 24 12:52:34 2008 +0900
@@ -666,6 +666,17 @@ char* xenbus_printf(xenbus_transaction_t
     return xenbus_write(xbt,fullpath,val);
 }
 
+domid_t xenbus_get_self_id(void)
+{
+    char *dom_id;
+    domid_t ret;
+
+    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
+    sscanf(dom_id, "%d", &ret);
+
+    return ret;
+}
+
 static void do_ls_test(const char *pre)
 {
     char **dirs, *msg;
diff -r 9837303a4708 -r 07f26e047fbf 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Wed Dec 
24 12:50:57 2008 +0900
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Wed Dec 
24 12:52:34 2008 +0900
@@ -83,7 +83,7 @@ READLINE_DEP = $$(READLINE_DIR)
 # -I. for config files.
 # -I${srcdir} for our headers.
 # -I$(srcdir)/../regformats for regdef.h.
-INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  
-I../../../../../libxc/
+INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  
-I../../../../../libxc/ -I../../../../../include/
 
 # M{H,T}_CFLAGS, if defined, has host- and target-dependent CFLAGS
 # from the config/ directory.
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/firmware/hvmloader/hvmloader.c      Wed Dec 24 12:52:34 2008 +0900
@@ -269,6 +269,11 @@ static void pci_setup(void)
             printf("pci dev %02x:%x INT%c->IRQ%u\n",
                    devfn>>3, devfn&7, 'A'+pin-1, isa_irq);
         }
+
+        /* Enable bus mastering. */
+        cmd = pci_readw(devfn, PCI_COMMAND);
+        cmd |= PCI_COMMAND_MASTER;
+        pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
     /* Assign iomem and ioport resources in descending order of size. */
@@ -534,6 +539,23 @@ static uint16_t init_xen_platform_io_bas
     }
 
     return bios_info->xen_pfiob;
+}
+
+/* Set up an empty TSS area for virtual 8086 mode to use. 
+ * The only important thing is that it musn't have any bits set 
+ * in the interrupt redirection bitmap, so all zeros will do.  */
+static void init_vm86_tss(void)
+{
+    uint32_t tss;
+    struct xen_hvm_param p;
+
+    tss = e820_malloc(128, 128);
+    memset((char *)tss, 0, 128);
+    p.domid = DOMID_SELF;
+    p.index = HVM_PARAM_VM86_TSS;
+    p.value = tss;
+    hypercall_hvm_op(HVMOP_set_param, &p);
+    printf("vm86 TSS at %08x\n", tss);
 }
 
 int main(void)
@@ -605,6 +627,8 @@ int main(void)
         printf("Loading ACPI ...\n");
         acpi_build_tables();
     }
+
+    init_vm86_tss();
 
     cmos_write_memory_size();
 
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/firmware/rombios/rombios.c  Wed Dec 24 12:52:34 2008 +0900
@@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $
+// $Id: rombios.c,v 1.221 2008/12/07 17:32:29 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2002  MandrakeSoft S.A.
@@ -22,9 +22,9 @@
 //
 //  You should have received a copy of the GNU Lesser General Public
 //  License along with this library; if not, write to the Free Software
-//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-
-// ROM BIOS for use with Bochs/Plex x86 emulation environment
+//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 
USA
+
+// ROM BIOS for use with Bochs/Plex86/QEMU emulation environment
 
 #define uint8_t unsigned char
 #define uint16_t unsigned short
@@ -81,10 +81,10 @@
 //
 // NOTES for El-Torito Boot (cbbochs@xxxxxxx)
 //   - CD-ROM booting is only available if ATA/ATAPI Driver is available
-//   - Current code is only able to boot mono-session cds 
+//   - Current code is only able to boot mono-session cds
 //   - Current code can not boot and emulate a hard-disk
 //     the bios will panic otherwise
-//   - Current code also use memory in EBDA segement. 
+//   - Current code also use memory in EBDA segement.
 //   - I used cmos byte 0x3D to store extended information on boot-device
 //   - Code has to be modified modified to handle multiple cdrom drives
 //   - Here are the cdrom boot failure codes:
@@ -102,13 +102,13 @@
 //      12 : can not read cd - boot image
 //
 //   ATA driver
-//   - EBDA segment. 
+//   - EBDA segment.
 //     I used memory starting at 0x121 in the segment
 //   - the translation policy is defined in cmos regs 0x39 & 0x3a
 //
 // TODO :
 //
-//   int74 
+//   int74
 //     - needs to be reworked.  Uses direct [bp] offsets. (?)
 //
 //   int13:
@@ -128,13 +128,13 @@
 //   - Implement remaining int13_cdemu functions (as defined by El-Torito 
specs)
 //   - cdrom drive is hardcoded to ide 0 device 1 in several places. see 
"FIXME ElTorito Hardcoded"
 //   - int13 Fix DL when emulating a cd. In that case DL is decremented before 
calling real int13.
-//     This is ok. But DL should be reincremented afterwards. 
+//     This is ok. But DL should be reincremented afterwards.
 //   - Fix all "FIXME ElTorito Various"
 //   - should be able to boot any cdrom instead of the first one
 //
 //   BCC Bug: find a generic way to handle the bug of #asm after an "if"  
(fixed in 0.16.7)
 
-#define DEBUG_ROMBIOS      0
+#include "rombios.h"
 
 #define DEBUG_ATA          0
 #define DEBUG_INT13_HD     0
@@ -159,7 +159,7 @@
 #define BX_USE_ATADRV    1
 #define BX_ELTORITO_BOOT 1
 
-#define BX_TCGBIOS       0              /* main switch for TCG BIOS ext. */
+#define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
 
 #define BX_MAX_ATA_INTERFACES   4
 #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
@@ -183,14 +183,18 @@
 #define EBDA_SIZE          1              // In KiB
 #define BASE_MEM_IN_K   (640 - EBDA_SIZE)
 
-  // Define the application NAME
-#ifdef HVMASSIST
-#  define BX_APPNAME "HVMAssist"
-#elif PLEX86
-#  define BX_APPNAME "Plex86"
-#else
-#  define BX_APPNAME "Bochs"
-#endif
+/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
+#define IPL_TABLE_OFFSET     0x0300  /* offset from EBDA */
+#define IPL_TABLE_ENTRIES    8
+#define IPL_COUNT_OFFSET     0x0380  /* u16: number of valid table entries */
+#define IPL_SEQUENCE_OFFSET  0x0382  /* u16: next boot device */
+#define IPL_BOOTFIRST_OFFSET 0x0384  /* u16: user selected device */
+#define IPL_SIZE             0xff
+#define IPL_TYPE_FLOPPY      0x01
+#define IPL_TYPE_HARDDISK    0x02
+#define IPL_TYPE_CDROM       0x03
+#define IPL_TYPE_BEV         0x80
+
 
   // Sanity Checks
 #if BX_USE_ATADRV && BX_CPU<3
@@ -209,15 +213,10 @@
 #    error APM BIOS can only be used with 386+ cpu
 #endif
 
-#ifndef BX_SMP_PROCESSORS
-#define BX_SMP_PROCESSORS 1
-#    warning BX_SMP_PROCESSORS not defined, defaulting to 1
-#endif
-  
-#define PANIC_PORT  0x400
-#define PANIC_PORT2 0x401
-#define INFO_PORT   0x402
-#define DEBUG_PORT  0x403
+// define this if you want to make PCIBIOS working on a specific bridge only
+// undef enables PCIBIOS when at least one PCI device is found
+// i440FX is emulated by Bochs and QEMU
+#define PCI_FIXED_HOST_BRIDGE 0x12378086 ;; i440FX PCI bridge
 
 // #20  is dec 20
 // #$20 is hex 20 = 32
@@ -250,7 +249,7 @@ use16 286
 
 MACRO HALT
   ;; the HALT macro is called with the line number of the HALT call.
-  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex 
+  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
   ;; to print a BX_PANIC message.  This will normally halt the simulation
   ;; with a message such as "BIOS panic at rombios.c, line 4091".
   ;; However, users can choose to make panics non-fatal and continue.
@@ -289,9 +288,9 @@ typedef unsigned long  Bit32u;
   void memsetb(seg,offset,value,count);
   void memcpyb(dseg,doffset,sseg,soffset,count);
   void memcpyd(dseg,doffset,sseg,soffset,count);
-  
+
   // memset of count bytes
-    void 
+    void
   memsetb(seg,offset,value,count)
     Bit16u seg;
     Bit16u offset;
@@ -301,14 +300,14 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
-  
+
       mov  cx, 10[bp] ; count
-      cmp  cx, #0x00
+      test cx, cx
       je   memsetb_end
       mov  ax, 4[bp] ; segment
       mov  es, ax
@@ -318,19 +317,19 @@ typedef unsigned long  Bit32u;
       cld
       rep
        stosb
-  
+
   memsetb_end:
       pop di
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
-  
+
   // memcpy of count bytes
-    void 
+    void
   memcpyb(dseg,doffset,sseg,soffset,count)
     Bit16u dseg;
     Bit16u doffset;
@@ -341,16 +340,16 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
       push ds
       push si
-  
+
       mov  cx, 12[bp] ; count
-      cmp  cx, #0x0000
+      test cx, cx
       je   memcpyb_end
       mov  ax, 4[bp] ; dsegment
       mov  es, ax
@@ -363,7 +362,7 @@ typedef unsigned long  Bit32u;
       cld
       rep
        movsb
-  
+
   memcpyb_end:
       pop si
       pop ds
@@ -371,14 +370,13 @@ typedef unsigned long  Bit32u;
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
 
-#if 0 
   // memcpy of count dword
-    void 
+    void
   memcpyd(dseg,doffset,sseg,soffset,count)
     Bit16u dseg;
     Bit16u doffset;
@@ -389,16 +387,16 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
       push ds
       push si
-  
+
       mov  cx, 12[bp] ; count
-      cmp  cx, #0x0000
+      test cx, cx
       je   memcpyd_end
       mov  ax, 4[bp] ; dsegment
       mov  es, ax
@@ -411,7 +409,7 @@ typedef unsigned long  Bit32u;
       cld
       rep
        movsd
-  
+
   memcpyd_end:
       pop si
       pop ds
@@ -419,16 +417,15 @@ typedef unsigned long  Bit32u;
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
-#endif
 
   // read_dword and write_dword functions
   static Bit32u         read_dword();
   static void           write_dword();
-  
+
     Bit32u
   read_dword(seg, offset)
     Bit16u seg;
@@ -437,25 +434,24 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push bx
       push ds
       mov  ax, 4[bp] ; segment
       mov  ds, ax
       mov  bx, 6[bp] ; offset
       mov  ax, [bx]
-      inc  bx
-      inc  bx
+      add  bx, #2
       mov  dx, [bx]
       ;; ax = return value (word)
       ;; dx = return value (word)
       pop  ds
       pop  bx
-  
+
     pop  bp
   ASM_END
   }
-  
+
     void
   write_dword(seg, offset, data)
     Bit16u seg;
@@ -465,7 +461,7 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push bx
       push ds
@@ -474,50 +470,49 @@ typedef unsigned long  Bit32u;
       mov  bx, 6[bp] ; offset
       mov  ax, 8[bp] ; data word
       mov  [bx], ax  ; write data word
-      inc  bx
-      inc  bx
+      add  bx, #2
       mov  ax, 10[bp] ; data word
       mov  [bx], ax  ; write data word
       pop  ds
       pop  bx
       pop  ax
-  
+
     pop  bp
   ASM_END
   }
-  
+
   // Bit32u (unsigned long) and long helper functions
   ASM_START
-  
+
   ;; and function
   landl:
   landul:
-    SEG SS 
+    SEG SS
       and ax,[di]
-    SEG SS 
+    SEG SS
       and bx,2[di]
     ret
-  
+
   ;; add function
   laddl:
   laddul:
-    SEG SS 
+    SEG SS
       add ax,[di]
-    SEG SS 
+    SEG SS
       adc bx,2[di]
     ret
-  
+
   ;; cmp function
   lcmpl:
   lcmpul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     shr ebx, #16
     SEG SS
       cmp eax, dword ptr [di]
     ret
-  
+
   ;; sub function
   lsubl:
   lsubul:
@@ -526,26 +521,26 @@ typedef unsigned long  Bit32u;
     SEG SS
     sbb bx,2[di]
     ret
-  
+
   ;; mul function
   lmull:
   lmulul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     SEG SS
     mul eax, dword ptr [di]
     mov ebx, eax
     shr ebx, #16
     ret
-  
+
   ;; dec function
   ldecl:
   ldecul:
     SEG SS
     dec dword ptr [bx]
     ret
-  
+
   ;; or function
   lorl:
   lorul:
@@ -554,31 +549,31 @@ typedef unsigned long  Bit32u;
     SEG SS
     or  bx,2[di]
     ret
-  
+
   ;; inc function
   lincl:
   lincul:
     SEG SS
     inc dword ptr [bx]
     ret
-  
+
   ;; tst function
   ltstl:
   ltstul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     shr ebx, #16
     test eax, eax
     ret
-  
+
   ;; sr function
   lsrul:
     mov  cx,di
     jcxz lsr_exit
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
+    or   eax, ebx
   lsr_loop:
     shr  eax, #1
     loop lsr_loop
@@ -586,7 +581,7 @@ typedef unsigned long  Bit32u;
     shr  ebx, #16
   lsr_exit:
     ret
-  
+
   ;; sl function
   lsll:
   lslul:
@@ -594,15 +589,15 @@ typedef unsigned long  Bit32u;
     jcxz lsl_exit
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
-  lsl_loop: 
+    or   eax, ebx
+  lsl_loop:
     shl  eax, #1
     loop lsl_loop
     mov  ebx, eax
     shr  ebx, #16
   lsl_exit:
     ret
-  
+
   idiv_:
     cwd
     idiv bx
@@ -616,7 +611,7 @@ typedef unsigned long  Bit32u;
   ldivul:
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
+    or   eax, ebx
     xor  edx, edx
     SEG SS
     mov  bx,  2[di]
@@ -665,7 +660,7 @@ typedef struct {
     Bit8u  revision;
     Bit8u  checksum;
     } dpte_t;
- 
+
   typedef struct {
     Bit8u  iface;        // ISA or PCI
     Bit16u iobase1;      // IO Base 1
@@ -678,15 +673,15 @@ typedef struct {
     Bit8u  device;       // Detected type of attached devices (hd/cd/none)
     Bit8u  removable;    // Removable device flag
     Bit8u  lock;         // Locks for removable devices
-    // Bit8u  lba_capable;  // LBA capable flag - always yes for bochs devices
-    Bit8u  mode;         // transfert mode : PIO 16/32 bits - IRQ - ISADMA - 
PCIDMA
+    Bit8u  mode;         // transfer mode : PIO 16/32 bits - IRQ - ISADMA - 
PCIDMA
     Bit16u blksize;      // block size
 
     Bit8u  translation;  // type of translation
     chs_t  lchs;         // Logical CHS
     chs_t  pchs;         // Physical CHS
 
-    Bit32u sectors;      // Total sectors count
+    Bit32u sectors_low;  // Total sectors count
+    Bit32u sectors_high;
     } ata_device_t;
 
   typedef struct {
@@ -697,10 +692,10 @@ typedef struct {
     ata_device_t  devices[BX_MAX_ATA_DEVICES];
     //
     // map between (bios hd id - 0x80) and ata channels
-    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];                
+    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];
 
     // map between (bios cd id - 0xE0) and ata channels
-    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];                
+    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];
 
     // Buffer for DPTE table
     dpte_t dpte;
@@ -710,9 +705,9 @@ typedef struct {
     Bit32u trsfbytes;
 
     } ata_t;
-  
+
 #if BX_ELTORITO_BOOT
-  // ElTorito Device Emulation data 
+  // ElTorito Device Emulation data
   typedef struct {
     Bit8u  active;
     Bit8u  media;
@@ -723,20 +718,20 @@ typedef struct {
     Bit16u buffer_segment;
     Bit16u load_segment;
     Bit16u sector_count;
-    
+
     // Virtual device
     chs_t  vdevice;
     } cdemu_t;
 #endif // BX_ELTORITO_BOOT
-  
+
 #include "32bitgateway.h"
 
   // for access to EBDA area
-  //     The EBDA structure should conform to 
-  //     http://www.cybertrails.com/~fys/rombios.htm document
+  //     The EBDA structure should conform to
+  //     http://www.frontiernet.net/~fys/rombios.htm document
   //     I made the ata and cdemu structs begin at 0x121 in the EBDA seg
-  // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot 
-  // device tables are at 0x9ff00 -- 0x9ffff
+  // EBDA must be at most 768 bytes; it lives at EBDA_SEG, and the boot
+  // device tables are at EBDA_SEG:IPL_TABLE_OFFSET
   typedef struct {
     unsigned char ebda_size;
     unsigned char cmos_shutdown_status;
@@ -758,7 +753,7 @@ typedef struct {
 
     upcall_t upcall;
     } ebda_data_t;
-  
+
   #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
   #define EbdaData ((ebda_data_t *) 0)
 
@@ -772,7 +767,7 @@ typedef struct {
     Bit32u lba1;
     Bit32u lba2;
     } int13ext_t;
- 
+
   #define Int13Ext ((int13ext_t *) 0)
 
   // Disk Physical Table definition
@@ -798,7 +793,7 @@ typedef struct {
     Bit8u   reserved3;
     Bit8u   checksum;
     } dpt_t;
- 
+
   #define Int13DPT ((dpt_t *) 0)
 
 #endif // BX_USE_ATADRV
@@ -828,9 +823,9 @@ typedef struct {
     } r16;
   struct {
     Bit32u filler[4];
-    Bit8u  bl, bh; 
+    Bit8u  bl, bh;
     Bit16u filler1;
-    Bit8u  dl, dh; 
+    Bit8u  dl, dh;
     Bit16u filler2;
     Bit8u  cl, ch;
     Bit16u filler3;
@@ -863,6 +858,14 @@ typedef struct {
   Bit16u cs;
   flags_t flags;
   } iret_addr_t;
+
+typedef struct {
+  Bit16u type;
+  Bit16u flags;
+  Bit32u vector;
+  Bit32u description;
+  Bit32u reserved;
+  } ipl_entry_t;
 
 
 
@@ -903,8 +906,6 @@ static void           int70_function();
 static void           int70_function();
 static void           int74_function();
 static Bit16u         get_CS();
-//static Bit16u         get_DS();
-//static void           set_DS();
 static Bit16u         get_SS();
 static unsigned int   enqueue_key();
 static unsigned int   dequeue_key();
@@ -923,7 +924,10 @@ static void           keyboard_panic();
 static void           keyboard_panic();
 static void           shutdown_status_panic();
 static void           nmi_handler_msg();
-
+static void           delay_ticks();
+static void           delay_ticks_and_check_for_keystroke();
+
+static void           interactive_bootkey();
 static void           print_bios_banner();
 static void           print_boot_device();
 static void           print_boot_failure();
@@ -957,33 +961,9 @@ Bit16u cdrom_boot();
 
 #endif // BX_ELTORITO_BOOT
 
-static char bios_cvs_version_string[] = "$Revision: 1.138 $";
-static char bios_date_string[] = "$Date: 2005/05/07 15:55:26 $";
-
-static char CVSID[] = "$Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp 
$";
-
-/* Offset to skip the CVS $Id: prefix */ 
-#define bios_version_string  (CVSID + 4)
-
-#define BIOS_PRINTF_HALT     1
-#define BIOS_PRINTF_SCREEN   2
-#define BIOS_PRINTF_INFO     4
-#define BIOS_PRINTF_DEBUG    8
-#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
-#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | 
BIOS_PRINTF_HALT)
-
-#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
-
-// Defines the output macros. 
-// BX_DEBUG goes to INFO port until we can easily choose debug info on a 
-// per-device basis. Debug info are sent only in debug mode
-#if DEBUG_ROMBIOS
-#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)   
 
-#else
-#  define BX_DEBUG(format, p...) 
-#endif
-#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
-#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+static char bios_cvs_version_string[] = "$Revision: 1.221 $ $Date: 2008/12/07 
17:32:29 $";
+
+#define BIOS_COPYRIGHT_STRING "(c) 2002 MandrakeSoft S.A. Written by Kevin 
Lawton & the Bochs team."
 
 #if DEBUG_ATA
 #  define BX_DEBUG_ATA(a...) BX_DEBUG(a)
@@ -1156,9 +1136,9 @@ static struct {
       { 0x5100, 0x5133, 0x7600,   none, 0x20 }, /* 3 PgDn */
       { 0x5200, 0x5230,   none,   none, 0x20 }, /* 0 Ins */
       { 0x5300, 0x532e,   none,   none, 0x20 }, /* Del */
-      {   none,   none,   none,   none, none }, /* ??? */
-      {   none,   none,   none,   none, none }, /* ??? */
-      {   none,   none,   none,   none, none }, /* ??? */
+      {   none,   none,   none,   none, none },
+      {   none,   none,   none,   none, none },
+      { 0x565c, 0x567c,   none,   none, none }, /* \| */
       { 0x8500, 0x8700, 0x8900, 0x8b00, none }, /* F11 */
       { 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */
       };
@@ -1415,31 +1395,6 @@ ASM_END
 ASM_END
 }
 
-//  Bit16u
-//get_DS()
-//{
-//ASM_START
-//  mov  ax, ds
-//ASM_END
-//}
-//
-//  void
-//set_DS(ds_selector)
-//  Bit16u ds_selector;
-//{
-//ASM_START
-//  push bp
-//  mov  bp, sp
-//
-//    push ax
-//    mov  ax, 4[bp] ; ds_selector
-//    mov  ds, ax
-//    pop  ax
-//
-//  pop  bp
-//ASM_END
-//}
-
   Bit16u
 get_SS()
 {
@@ -1455,7 +1410,7 @@ copy_e820_table()
   Bit8u nr_entries = read_byte(0x9000, 0x1e8);
   Bit32u base_mem;
   if (nr_entries > 32)
-       nr_entries = 32;
+       nr_entries = 32;
   write_word(0xe000, 0x8, nr_entries);
   memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
   /* Report the proper base memory size at address 0x0413: otherwise
@@ -1563,7 +1518,7 @@ wrch(c)
   pop  bp
   ASM_END
 }
- 
+
   void
 send(action, c)
   Bit16u action;
@@ -1619,14 +1574,121 @@ put_uint(action, val, width, neg)
   send(action, val - (nval * 10) + '0');
 }
 
+  void
+put_luint(action, val, width, neg)
+  Bit16u action;
+  unsigned long val;
+  short width;
+  bx_bool neg;
+{
+  unsigned long nval = val / 10;
+  if (nval)
+    put_luint(action, nval, width - 1, neg);
+  else {
+    while (--width > 0) send(action, ' ');
+    if (neg) send(action, '-');
+  }
+  send(action, val - (nval * 10) + '0');
+}
+
+void put_str(action, segment, offset)
+  Bit16u action;
+  Bit16u segment;
+  Bit16u offset;
+{
+  Bit8u c;
+
+  while (c = read_byte(segment, offset)) {
+    send(action, c);
+    offset++;
+  }
+}
+
+  void
+delay_ticks(ticks)
+  Bit16u ticks;
+{
+  long ticks_to_wait, delta;
+  Bit32u prev_ticks, t;
+
+   /*
+    * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
+    * We also have to be careful about interrupt storms.
+    */
+ASM_START
+  pushf
+  sti
+ASM_END
+  ticks_to_wait = ticks;
+  prev_ticks = read_dword(0x0, 0x46c);
+  do
+  {
+ASM_START
+    hlt
+ASM_END
+    t = read_dword(0x0, 0x46c);
+    if (t > prev_ticks)
+    {
+      delta = t - prev_ticks;     /* The temp var is required or bcc screws 
up. */
+      ticks_to_wait -= delta;
+    }
+    else if (t < prev_ticks)
+    {
+      ticks_to_wait -= t;         /* wrapped */
+    }
+
+    prev_ticks = t;
+  } while (ticks_to_wait > 0);
+ASM_START
+  cli
+  popf
+ASM_END
+}
+
+  Bit8u
+check_for_keystroke()
+{
+ASM_START
+  mov  ax, #0x100
+  int  #0x16
+  jz   no_key
+  mov  al, #1
+  jmp  done
+no_key:
+  xor  al, al
+done:
+ASM_END
+}
+
+  Bit8u
+get_keystroke()
+{
+ASM_START
+  mov  ax, #0x0
+  int  #0x16
+  xchg ah, al
+ASM_END
+}
+
+  void
+delay_ticks_and_check_for_keystroke(ticks, count)
+  Bit16u ticks, count;
+{
+  Bit16u i;
+  for (i = 1; i <= count; i++) {
+    delay_ticks(ticks);
+    if (check_for_keystroke())
+      break;
+  }
+}
+
 //--------------------------------------------------------------------------
 // bios_printf()
-//   A compact variable argument printf function which prints its output via
-//   an I/O port so that it can be logged by Bochs/Plex.  
-//   Currently, only %x is supported (or %02x, %04x, etc).
+//   A compact variable argument printf function.
 //
-//   Supports %[format_width][format]
-//   where format can be d,x,c,s
+//   Supports %[format_width][length]format
+//   where format can be x,X,u,d,s,S,c
+//   and the optional length modifier is l (ell)
 //--------------------------------------------------------------------------
   void
 bios_printf(action, s)
@@ -1637,7 +1699,7 @@ bios_printf(action, s)
   bx_bool  in_format;
   short i;
   Bit16u  *arg_ptr;
-  Bit16u   arg_seg, arg, nibble, shift_count, format_width;
+  Bit16u   arg_seg, arg, nibble, hibyte, shift_count, format_width, hexadd;
 
   arg_ptr = &s;
   arg_seg = get_SS();
@@ -1664,16 +1726,48 @@ bios_printf(action, s)
       else {
         arg_ptr++; // increment to next arg
         arg = read_word(arg_seg, arg_ptr);
-        if (c == 'x') {
+        if (c == 'x' || c == 'X') {
           if (format_width == 0)
             format_width = 4;
+          if (c == 'x')
+            hexadd = 'a';
+          else
+            hexadd = 'A';
           for (i=format_width-1; i>=0; i--) {
             nibble = (arg >> (4 * i)) & 0x000f;
-            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+'A'));
+            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
             }
           }
         else if (c == 'u') {
           put_uint(action, arg, format_width, 0);
+          }
+        else if (c == 'l') {
+          s++;
+          c = read_byte(get_CS(), s); /* is it ld,lx,lu? */
+          arg_ptr++; /* increment to next arg */
+          hibyte = read_word(arg_seg, arg_ptr);
+          if (c == 'd') {
+            if (hibyte & 0x8000)
+              put_luint(action, 0L-(((Bit32u) hibyte << 16) | arg), 
format_width-1, 1);
+            else
+              put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 
0);
+           }
+          else if (c == 'u') {
+            put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
+           }
+          else if (c == 'x' || c == 'X')
+           {
+            if (format_width == 0)
+              format_width = 8;
+            if (c == 'x')
+              hexadd = 'a';
+            else
+              hexadd = 'A';
+            for (i=format_width-1; i>=0; i--) {
+              nibble = ((((Bit32u) hibyte <<16) | arg) >> (4 * i)) & 0x000f;
+              send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
+              }
+           }
           }
         else if (c == 'd') {
           if (arg & 0x8000)
@@ -1682,7 +1776,13 @@ bios_printf(action, s)
             put_int(action, arg, format_width, 0);
           }
         else if (c == 's') {
-          bios_printf(action & (~BIOS_PRINTF_HALT), arg);
+          put_str(action, get_CS(), arg);
+          }
+        else if (c == 'S') {
+          hibyte = arg;
+          arg_ptr++;
+          arg = read_word(arg_seg, arg_ptr);
+          put_str(action, hibyte, arg);
           }
         else if (c == 'c') {
           send(action, arg);
@@ -1699,7 +1799,7 @@ bios_printf(action, s)
     }
 
   if (action & BIOS_PRINTF_HALT) {
-    // freeze in a busy loop.  
+    // freeze in a busy loop.
 ASM_START
     cli
  halt2_loop:
@@ -1733,8 +1833,8 @@ keyboard_init()
             max = 0x2000;
             }
         }
-  
-    // Due to timer issues, and if the IPS setting is > 15000000, 
+
+    // Due to timer issues, and if the IPS setting is > 15000000,
     // the incoming keys might not be flushed here. That will
     // cause a panic a few lines below.  See sourceforge bug report :
     // [ 642031 ] FATAL: Keyboard RESET error:993
@@ -1871,12 +1971,11 @@ keyboard_panic(status)
 keyboard_panic(status)
   Bit16u status;
 {
-  // If you're getting a 993 keyboard panic here, 
+  // If you're getting a 993 keyboard panic here,
   // please see the comment in keyboard_init
-  
+
   BX_PANIC("Keyboard error:%u\n",status);
 }
-
 
 #define CMOS_SHUTDOWN_S3 0xFE
 //--------------------------------------------------------------------------
@@ -1932,6 +2031,11 @@ shutdown_status_panic(status)
   BX_PANIC("Unimplemented shutdown status: %02x\n",(Bit8u)status);
 }
 
+void s3_resume_panic()
+{
+  BX_PANIC("Returned from s3_resume.\n");
+}
+
 //--------------------------------------------------------------------------
 // print_bios_banner
 //   displays a the bios version
@@ -1939,108 +2043,197 @@ void
 void
 print_bios_banner()
 {
-  printf(BX_APPNAME" BIOS, %d cpu%s, ", BX_SMP_PROCESSORS, 
BX_SMP_PROCESSORS>1?"s":"");
-  printf("%s %s\n", bios_cvs_version_string, bios_date_string);
+  printf(BX_APPNAME" BIOS - build: %s\n%s\nOptions: ",
+    BIOS_BUILD_DATE, bios_cvs_version_string);
+  printf(
+#if BX_APM
+  "apmbios "
+#endif
+#if BX_PCIBIOS
+  "pcibios "
+#endif
+#if BX_ELTORITO_BOOT
+  "eltorito "
+#endif
+#if BX_ROMBIOS32
+  "rombios32 "
+#endif
 #if BX_TCGBIOS
-  printf("TCG-enabled BIOS.\n");
+  "TCG-enabled"
 #endif
-  printf("\n");
+  "\n\n");
 }
-
 
 //--------------------------------------------------------------------------
 // BIOS Boot Specification 1.0.1 compatibility
 //
-// Very basic support for the BIOS Boot Specification, which allows expansion 
-// ROMs to register themselves as boot devices, instead of just stealing the 
+// Very basic support for the BIOS Boot Specification, which allows expansion
+// ROMs to register themselves as boot devices, instead of just stealing the
 // INT 19h boot vector.
-// 
+//
 // This is a hack: to do it properly requires a proper PnP BIOS and we aren't
-// one; we just lie to the option ROMs to make them behave correctly. 
-// We also don't support letting option ROMs register as bootable disk 
-// drives (BCVs), only as bootable devices (BEVs). 
+// one; we just lie to the option ROMs to make them behave correctly.
+// We also don't support letting option ROMs register as bootable disk
+// drives (BCVs), only as bootable devices (BEVs).
 //
 // 
http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm
 //--------------------------------------------------------------------------
 
-/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
-#define IPL_SEG              0x9ff0
-#define IPL_TABLE_OFFSET     0x0000
-#define IPL_TABLE_ENTRIES    8
-#define IPL_COUNT_OFFSET     0x0080  /* u16: number of valid table entries */
-#define IPL_SEQUENCE_OFFSET  0x0082  /* u16: next boot device */
-
-struct ipl_entry {
-  Bit16u type;
-  Bit16u flags;
-  Bit32u vector;
-  Bit32u description;
-  Bit32u reserved;
-};
-
-static void 
-init_boot_vectors() 
+static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
+
+static void
+init_boot_vectors()
 {
-  struct ipl_entry e; 
+  ipl_entry_t e;
   Bit16u count = 0;
   Bit16u ss = get_SS();
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
 
   /* Clear out the IPL table. */
-  memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff);
+  memsetb(ebda_seg, IPL_TABLE_OFFSET, 0, IPL_SIZE);
+
+  /* User selected device not set */
+  write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
 
   /* Floppy drive */
-  e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_FLOPPY; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
 
   /* First HDD */
-  e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_HARDDISK; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
 
 #if BX_ELTORITO_BOOT
   /* CDROM */
-  e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_CDROM; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
-#endif  
+#endif
 
   /* Remember how many devices we have */
-  write_word(IPL_SEG, IPL_COUNT_OFFSET, count);
+  write_word(ebda_seg, IPL_COUNT_OFFSET, count);
   /* Not tried booting anything yet */
-  write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff);
+  write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xffff);
 }
 
 static Bit8u
 get_boot_vector(i, e)
-Bit16u i; struct ipl_entry *e; 
+Bit16u i; ipl_entry_t *e;
 {
   Bit16u count;
   Bit16u ss = get_SS();
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
   /* Get the count of boot devices, and refuse to overrun the array */
-  count = read_word(IPL_SEG, IPL_COUNT_OFFSET);
+  count = read_word(ebda_seg, IPL_COUNT_OFFSET);
   if (i >= count) return 0;
   /* OK to read this device */
-  memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
+  memcpyb(ss, e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
   return 1;
 }
 
+#if BX_ELTORITO_BOOT
+  void
+interactive_bootkey()
+{
+  ipl_entry_t e;
+  Bit16u count;
+  char description[33];
+  Bit8u scan_code;
+  Bit8u i;
+  Bit16u ss = get_SS();
+  Bit16u valid_choice = 0;
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
+
+  while (check_for_keystroke())
+    get_keystroke();
+
+  printf("\nPress F12 for boot menu.\n\n");
+
+  delay_ticks_and_check_for_keystroke(11, 5); /* ~3 seconds */
+  if (check_for_keystroke())
+  {
+    scan_code = get_keystroke();
+    if (scan_code == 0x86) /* F12 */
+    {
+      while (check_for_keystroke())
+        get_keystroke();
+
+      printf("Select boot device:\n\n");
+
+      count = read_word(ebda_seg, IPL_COUNT_OFFSET);
+      for (i = 0; i < count; i++)
+      {
+        memcpyb(ss, &e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (e), sizeof 
(e));
+        printf("%d. ", i+1);
+        switch(e.type)
+        {
+          case IPL_TYPE_FLOPPY:
+          case IPL_TYPE_HARDDISK:
+          case IPL_TYPE_CDROM:
+            printf("%s\n", drivetypes[e.type]);
+            break;
+          case IPL_TYPE_BEV:
+            printf("%s", drivetypes[4]);
+            if (e.description != 0)
+            {
+              memcpyb(ss, &description, (Bit16u)(e.description >> 16), 
(Bit16u)(e.description & 0xffff), 32);
+              description[32] = 0;
+              printf(" [%S]", ss, description);
+           }
+           printf("\n");
+           break;
+        }
+      }
+
+      count++;
+      while (!valid_choice) {
+        scan_code = get_keystroke();
+        if (scan_code == 0x01 || scan_code == 0x58) /* ESC or F12 */
+        {
+          valid_choice = 1;
+        }
+        else if (scan_code <= count)
+        {
+          valid_choice = 1;
+          scan_code -= 1;
+          /* Set user selected device */
+          write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, scan_code);
+        }
+      }
+    printf("\n");
+    }
+  }
+}
+#endif // BX_ELTORITO_BOOT
 
 //--------------------------------------------------------------------------
 // print_boot_device
 //   displays the boot device
 //--------------------------------------------------------------------------
 
-static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
-
 void
-print_boot_device(type)
+print_boot_device(e)
+  ipl_entry_t *e;
+{
   Bit16u type;
-{
-  /* NIC appears as type 0x80 */ 
-  if (type == 0x80 ) type = 0x4;
-  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n"); 
-  printf("Booting from %s...\n", drivetypes[type]);
+  char description[33];
+  Bit16u ss = get_SS();
+  type = e->type;
+  /* NIC appears as type 0x80 */
+  if (type == IPL_TYPE_BEV) type = 0x4;
+  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
+  printf("Booting from %s", drivetypes[type]);
+  /* print product string if BEV */
+  if (type == 4 && e->description != 0) {
+    /* first 32 bytes are significant */
+    memcpyb(ss, &description, (Bit16u)(e->description >> 16), 
(Bit16u)(e->description & 0xffff), 32);
+    /* terminate string */
+    description[32] = 0;
+    printf(" [%S]", ss, description);
+  }
+  printf("...\n");
 }
 
 //--------------------------------------------------------------------------
@@ -2051,17 +2244,17 @@ print_boot_failure(type, reason)
 print_boot_failure(type, reason)
   Bit16u type; Bit8u reason;
 {
-  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n"); 
+  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
 
   printf("Boot from %s failed", drivetypes[type]);
   if (type < 4) {
     /* Report the reason too */
-  if (reason==0) 
-    printf(": not a bootable disk");
-  else
-    printf(": could not read the boot disk");
+    if (reason==0)
+      printf(": not a bootable disk");
+    else
+      printf(": could not read the boot disk");
   }
-  printf("\n");
+  printf("\n\n");
 }
 
 //--------------------------------------------------------------------------
@@ -2073,218 +2266,9 @@ print_cdromboot_failure( code )
   Bit16u code;
 {
   bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "CDROM boot failure code 
: %04x\n",code);
-  
+
   return;
 }
-
-#define WAIT_HZ 18
-/**
- * Check for keystroke.
- * @returns    True if keystroke available, False if not.
- */
-Bit8u check_for_keystroke()
-{
-ASM_START
-    mov  ax, #0x100
-    int  #0x16
-    jz   no_key
-    mov  al, #1
-    jmp  done
-no_key:
-    xor  al, al
-done:
-ASM_END
-}
-
-/**
- * Get keystroke.
- * @returns    BIOS scan code.
- */
-Bit8u get_keystroke()
-{
-ASM_START
-    mov  ax, #0x0
-    int  #0x16
-    xchg ah, al
-ASM_END
-}
-
-/**
- * Waits (sleeps) for the given number of ticks.
- * Checks for keystroke.
- *
- * @returns BIOS scan code if available, 0 if not.
- * @param   ticks       Number of ticks to sleep.
- * @param   stop_on_key Whether to stop immediately upon keypress.
- */
-Bit8u wait(ticks, stop_on_key)
-  Bit16u ticks;
-  Bit8u stop_on_key;
-{
-    long ticks_to_wait, delta;
-    Bit32u prev_ticks, t;
-    Bit8u scan_code = 0;
-
-    /*
-     * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
-     * We also have to be careful about interrupt storms.
-     */
-    ticks_to_wait = ticks;
-    prev_ticks = read_dword(0x0, 0x46c);
-    do
-    {
-        t = read_dword(0x0, 0x46c);
-        if (t > prev_ticks)
-        {
-            delta = t - prev_ticks;     /* The temp var is required or bcc 
screws up. */
-            ticks_to_wait -= delta;
-        }
-        else if (t < prev_ticks)
-            ticks_to_wait -= t;         /* wrapped */
-        prev_ticks = t;
-
-        if (check_for_keystroke())
-        {
-            scan_code = get_keystroke();
-            bios_printf(BIOS_PRINTF_DEBUG, "Key pressed: %x\n", scan_code);
-            if (stop_on_key)
-                return scan_code;
-        }
-    } while (ticks_to_wait > 0);
-    return scan_code;
-}
-
-static void clearscreen() {
-    /* Hide cursor, clear screen and move cursor to starting position */
-ASM_START
-        push bx
-        push cx
-        push dx
-
-        mov  ax, #0x100
-        mov  cx, #0x1000
-        int  #0x10
-
-        mov  ax, #0x700
-        mov  bh, #7
-        xor  cx, cx
-        mov  dx, #0x184f
-        int  #0x10
-
-        mov  ax, #0x200
-        xor  bx, bx
-        xor  dx, dx
-        int  #0x10
-
-        pop  dx
-        pop  cx
-        pop  bx
-ASM_END
-}
-
-int bootmenu(selected)
-  int selected;
-{
-    Bit8u scode;
-    int max;
-
-    /* get the number of boot devices */
-    max = read_word(IPL_SEG, IPL_COUNT_OFFSET);
-
-    for(;;) {
-        if (selected > max || selected < 1) selected = 1;
-        clearscreen();
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n\n\n\n\n\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "          Select 
boot device\n\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            1. 
Floppy\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            2. 
Hard drive\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            3. 
CD-ROM\n");
-        if (max == 4)
-            bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            4. 
Network\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n          
Currently selected: %d\n", selected);
-
-        do {
-            scode = wait(WAIT_HZ, 1);
-        } while (scode == 0);
-        switch(scode) {
-        case 0x02:
-        case 0x03:
-        case 0x04:
-            selected = scode - 1;
-            break;
-        case 0x05:
-            if (max == 4)
-                selected = scode -1 ;
-            else
-                scode = 0;
-            break;
-        case 0x48:
-            selected -= 1;
-            if (selected < 1)
-                selected = 1;
-            scode = 0;
-            break;
-        case 0x50:
-            selected += 1;
-            if (selected > max)
-                selected = max;
-            scode = 0;
-            break;
-        case 0x1c:
-            break;
-        default:
-            scode = 0;
-            break;
-        }
-        if (scode != 0)
-            break;
-    }
-
-    switch (selected) {
-    case 1:
-        return 0x3D;
-    case 2:
-        return 0x3E;
-    case 3:
-        return 0x3F;
-    case 4:
-        return 0x58;
-    default:
-        return 0;
-    }
-}
-
-void interactive_bootkey()
-{
-    Bit16u i;
-    Bit8u scan = 0;
-
-    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
-                "\n\nPress F10 to select boot device.\n");
-
-    scan = wait(1, 0);
-    if (scan == 0x44)
-        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
-
-    /* set the default based on the keypress or menu */
-    switch(scan) {
-    case 0x3D:
-        outb_cmos(0x3d, 0x01);
-        break;
-    case 0x3E:
-        outb_cmos(0x3d, 0x02);
-        break;
-    case 0x3F:
-        outb_cmos(0x3d, 0x03);
-        break;
-    case 0x58:
-        outb_cmos(0x3d, 0x04);
-        break;
-    default:
-        break;
-    }
-}
-
 
 void
 nmi_handler_msg()
@@ -2304,7 +2288,7 @@ log_bios_start()
 #if BX_DEBUG_SERIAL
   outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
 #endif
-  BX_INFO("%s\n", bios_version_string);
+  BX_INFO("%s\n", bios_cvs_version_string);
 }
 
   bx_bool
@@ -2339,39 +2323,35 @@ debugger_off()
   outb(0xfedc, 0x00);
 }
 
-void 
+int
 s3_resume()
 {
     Bit32u s3_wakeup_vector;
-    Bit16u s3_wakeup_ip, s3_wakeup_cs;
-    Bit8u cmos_shutdown_status;
-
+    Bit8u s3_resume_flag;
+
+    s3_resume_flag = read_byte(0x40, 0xb0);
+#ifdef HVMASSIST
+    s3_wakeup_vector = get_s3_waking_vector();
+#else
+    s3_wakeup_vector = read_dword(0x40, 0xb2);
+#endif
+
+    BX_INFO("S3 resume called %x 0x%lx\n", s3_resume_flag, s3_wakeup_vector);
+    if (s3_resume_flag != CMOS_SHUTDOWN_S3 || !s3_wakeup_vector)
+           return 0;
+
+    write_byte(0x40, 0xb0, 0);
+
+    /* setup wakeup vector */
+    write_word(0x40, 0xb6, (s3_wakeup_vector & 0xF)); /* IP */
+    write_word(0x40, 0xb8, (s3_wakeup_vector >> 4)); /* CS */
+
+    BX_INFO("S3 resume jump to %x:%x\n", (s3_wakeup_vector >> 4),
+                   (s3_wakeup_vector & 0xF));
 ASM_START
-    push ds
-    push ax
-    mov ax, #EBDA_SEG
-    mov ds, ax
-    mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
-    mov .s3_resume.cmos_shutdown_status[bp], al
-    pop ax
-    pop ds
+    jmpf [0x04b6]
 ASM_END
-
-    if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
-        return;
-
-    s3_wakeup_vector = get_s3_waking_vector();
-    if (!s3_wakeup_vector)
-        return;
-
-    s3_wakeup_ip = s3_wakeup_vector & 0xF;
-    s3_wakeup_cs = s3_wakeup_vector >> 4;
-
-ASM_START
-    push .s3_resume.s3_wakeup_cs[bp]
-    push .s3_resume.s3_wakeup_ip[bp]
-    retf
-ASM_END
+    return 1;
 }
 
 #if BX_USE_ATADRV
@@ -2421,6 +2401,7 @@ ASM_END
 // bits 7-4 of the device/head (CB_DH) reg
 #define ATA_CB_DH_DEV0 0xa0    // select device 0
 #define ATA_CB_DH_DEV1 0xb0    // select device 1
+#define ATA_CB_DH_LBA 0x40    // use LBA
 
 // status reg (CB_STAT and CB_ASTAT) bits
 #define ATA_CB_STAT_BSY  0x80  // busy
@@ -2470,6 +2451,7 @@ ASM_END
 #define ATA_CMD_READ_SECTORS                 0x20
 #define ATA_CMD_READ_VERIFY_SECTORS          0x40
 #define ATA_CMD_RECALIBRATE                  0x10
+#define ATA_CMD_REQUEST_SENSE                0x03
 #define ATA_CMD_SEEK                         0x70
 #define ATA_CMD_SET_FEATURES                 0xEF
 #define ATA_CMD_SET_MULTIPLE_MODE            0xC6
@@ -2514,7 +2496,7 @@ ASM_END
 #define ATA_DATA_NO      0x00
 #define ATA_DATA_IN      0x01
 #define ATA_DATA_OUT     0x02
-  
+
 // ---------------------------------------------------------------------------
 // ATA/ATAPI driver : initialization
 // ---------------------------------------------------------------------------
@@ -2523,7 +2505,7 @@ void ata_init( )
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit8u  channel, device;
 
-  // Channels info init. 
+  // Channels info init.
   for (channel=0; channel<BX_MAX_ATA_INTERFACES; channel++) {
     write_byte(ebda_seg,&EbdaData->ata.channels[channel].iface,ATA_IFACE_NONE);
     write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1,0x0);
@@ -2531,7 +2513,7 @@ void ata_init( )
     write_byte(ebda_seg,&EbdaData->ata.channels[channel].irq,0);
     }
 
-  // Devices info init. 
+  // Devices info init.
   for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
     write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
     write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_NONE);
@@ -2546,11 +2528,12 @@ void ata_init( )
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads,0);
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders,0);
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt,0);
-    
-    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors,0L);
+
+    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low,0L);
+    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high,0L);
     }
 
-  // hdidmap  and cdidmap init. 
+  // hdidmap  and cdidmap init.
   for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
     write_byte(ebda_seg,&EbdaData->ata.hdidmap[device],BX_MAX_ATA_DEVICES);
     write_byte(ebda_seg,&EbdaData->ata.cdidmap[device],BX_MAX_ATA_DEVICES);
@@ -2558,6 +2541,58 @@ void ata_init( )
 
   write_byte(ebda_seg,&EbdaData->ata.hdcount,0);
   write_byte(ebda_seg,&EbdaData->ata.cdcount,0);
+}
+
+#define TIMEOUT 0
+#define BSY 1
+#define NOT_BSY 2
+#define NOT_BSY_DRQ 3
+#define NOT_BSY_NOT_DRQ 4
+#define NOT_BSY_RDY 5
+
+#define IDE_TIMEOUT 32000u //32 seconds max for IDE ops
+
+int await_ide();
+static int await_ide(when_done,base,timeout)
+  Bit8u when_done;
+  Bit16u base;
+  Bit16u timeout;
+{
+  Bit32u time=0,last=0;
+  Bit16u status;
+  Bit8u result;
+  status = inb(base + ATA_CB_STAT); // for the times you're supposed to throw 
one away
+  for(;;) {
+    status = inb(base+ATA_CB_STAT);
+    time++;
+    if (when_done == BSY)
+      result = status & ATA_CB_STAT_BSY;
+    else if (when_done == NOT_BSY)
+      result = !(status & ATA_CB_STAT_BSY);
+    else if (when_done == NOT_BSY_DRQ)
+      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_DRQ);
+    else if (when_done == NOT_BSY_NOT_DRQ)
+      result = !(status & ATA_CB_STAT_BSY) && !(status & ATA_CB_STAT_DRQ);
+    else if (when_done == NOT_BSY_RDY)
+      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_RDY);
+    else if (when_done == TIMEOUT)
+      result = 0;
+
+    if (result) return 0;
+    if (time>>16 != last) // mod 2048 each 16 ms
+    {
+      last = time >>16;
+      BX_DEBUG_ATA("await_ide: (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) 
%d time= %ld timeout= %d\n",when_done,time>>11, timeout);
+    }
+    if (status & ATA_CB_STAT_ERR)
+    {
+      BX_DEBUG_ATA("await_ide: ERROR 
(TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= 
%d\n",when_done,time>>11, timeout);
+      return -1;
+    }
+    if ((timeout == 0) || ((time>>11) > timeout)) break;
+  }
+  BX_INFO("IDE time out\n");
+  return -1;
 }
 
 // ---------------------------------------------------------------------------
@@ -2600,7 +2635,7 @@ void ata_detect( )
 
   // Device detection
   hdcount=cdcount=0;
-  
+
   for(device=0; device<BX_MAX_ATA_DEVICES; device++) {
     Bit16u iobase1, iobase2;
     Bit8u  channel, slave, shift;
@@ -2630,33 +2665,34 @@ void ata_detect( )
 
     if ( (sc == 0x55) && (sn == 0xaa) ) {
       
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_UNKNOWN);
-    
+
       // reset the channel
-      ata_reset (device);
-      
+      ata_reset(device);
+
       // check for ATA or ATAPI
       outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
       sc = inb(iobase1+ATA_CB_SC);
       sn = inb(iobase1+ATA_CB_SN);
-      if ( (sc==0x01) && (sn==0x01) ) {
+      if ((sc==0x01) && (sn==0x01)) {
         cl = inb(iobase1+ATA_CB_CL);
         ch = inb(iobase1+ATA_CB_CH);
         st = inb(iobase1+ATA_CB_STAT);
 
-        if ( (cl==0x14) && (ch==0xeb) ) {
+        if ((cl==0x14) && (ch==0xeb)) {
           
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATAPI);
-          }
-        else if ( (cl==0x00) && (ch==0x00) && (st!=0x00) ) {
+        } else if ((cl==0x00) && (ch==0x00) && (st!=0x00)) {
           
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATA);
-          }
+        } else if ((cl==0xff) && (ch==0xff)) {
+          
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
         }
       }
+    }
 
     type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
-    
-    // Now we send a IDENTIFY command to ATA device 
+
+    // Now we send a IDENTIFY command to ATA device
     if(type == ATA_TYPE_ATA) {
-      Bit32u sectors;
+      Bit32u sectors_low, sectors_high;
       Bit16u cylinders, heads, spt, blksize;
       Bit8u  translation, removable, mode;
 
@@ -2667,21 +2703,26 @@ void ata_detect( )
       write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
 
-      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 
get_SS(),buffer) !=0 )
+      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 0L, 
get_SS(),buffer) !=0 )
         BX_PANIC("ata-detect: Failed to detect ATA device\n");
 
       removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef        NO_PIO32
+#ifndef        NO_PIO32
       mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : 
ATA_MODE_PIO16;
 #endif
-
       blksize   = read_word(get_SS(),buffer+10);
-      
+
       cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
       heads     = read_word(get_SS(),buffer+(3*2)); // word 3
       spt       = read_word(get_SS(),buffer+(6*2)); // word 6
 
-      sectors   = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
+      if (read_word(get_SS(),buffer+(83*2)) & (1 << 10)) { // word 83 - lba48 
support
+        sectors_low  = read_dword(get_SS(),buffer+(100*2)); // word 100 and 
word 101
+        sectors_high = read_dword(get_SS(),buffer+(102*2)); // word 102 and 
word 103
+      } else {
+        sectors_low = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 
61
+        sectors_high = 0;
+      }
 
       write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
@@ -2690,7 +2731,8 @@ void ata_detect( )
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads, heads);
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders, 
cylinders);
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt, spt);
-      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors, sectors);
+      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, 
sectors_low);
+      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high, 
sectors_high);
       BX_INFO("ata%d-%d: PCHS=%u/%d/%d translation=", channel, 
slave,cylinders, heads, spt);
 
       translation = inb_cmos(0x39 + channel/2);
@@ -2718,14 +2760,14 @@ void ata_detect( )
           break;
         case ATA_TRANSLATION_LBA:
           spt = 63;
-          sectors /= 63;
-          heads = sectors / 1024;
+          sectors_low /= 63;
+          heads = sectors_low / 1024;
           if (heads>128) heads = 255;
           else if (heads>64) heads = 128;
           else if (heads>32) heads = 64;
           else if (heads>16) heads = 32;
           else heads=16;
-          cylinders = sectors / heads;
+          cylinders = sectors_low / heads;
           break;
         case ATA_TRANSLATION_RECHS:
           // Take care not to overflow
@@ -2752,15 +2794,15 @@ void ata_detect( )
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads, heads);
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders, 
cylinders);
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt, spt);
- 
-      // fill hdidmap 
+
+      // fill hdidmap
       write_byte(ebda_seg,&EbdaData->ata.hdidmap[hdcount], device);
       hdcount++;
       }
-    
+
     // Now we send a IDENTIFY command to ATAPI device
     if(type == ATA_TYPE_ATAPI) {
- 
+
       Bit8u  type, removable, mode;
       Bit16u blksize;
 
@@ -2771,12 +2813,12 @@ void ata_detect( )
       
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
 
-      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 
0L, get_SS(),buffer) != 0)
+      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 
0L, 0L, get_SS(),buffer) != 0)
         BX_PANIC("ata-detect: Failed to detect ATAPI device\n");
 
       type      = read_byte(get_SS(),buffer+1) & 0x1f;
       removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef        NO_PIO32
+#ifndef        NO_PIO32
       mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : 
ATA_MODE_PIO16;
 #endif
       blksize   = 2048;
@@ -2786,24 +2828,24 @@ void ata_detect( )
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
       write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
 
-      // fill cdidmap 
+      // fill cdidmap
       write_byte(ebda_seg,&EbdaData->ata.cdidmap[cdcount], device);
       cdcount++;
       }
-  
+
       {
       Bit32u sizeinmb;
       Bit16u ataversion;
       Bit8u  c, i, version, model[41];
-      
+
       switch (type) {
         case ATA_TYPE_ATA:
-          sizeinmb = 
read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors);
-          sizeinmb >>= 11;
+          sizeinmb = 
(read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high) << 21)
+            | (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low) 
>> 11);
         case ATA_TYPE_ATAPI:
           // Read ATA/ATAPI version
           
ataversion=((Bit16u)(read_byte(get_SS(),buffer+161))<<8)|read_byte(get_SS(),buffer+160);
-          for(version=15;version>0;version--) { 
+          for(version=15;version>0;version--) {
             if((ataversion&(1<<version))!=0)
             break;
             }
@@ -2812,7 +2854,7 @@ void ata_detect( )
           for(i=0;i<20;i++){
             
write_byte(get_SS(),model+(i*2),read_byte(get_SS(),buffer+(i*2)+54+1));
             
write_byte(get_SS(),model+(i*2)+1,read_byte(get_SS(),buffer+(i*2)+54));
-            }
+          }
 
           // Reformat
           write_byte(get_SS(),model+40,0x00);
@@ -2820,7 +2862,13 @@ void ata_detect( )
             if(read_byte(get_SS(),model+i)==0x20)
               write_byte(get_SS(),model+i,0x00);
             else break;
+          }
+          if (i>36) {
+            write_byte(get_SS(),model+36,0x00);
+            for(i=35;i>32;i--){
+              write_byte(get_SS(),model+i,0x2E);
             }
+          }
           break;
         }
 
@@ -2828,10 +2876,10 @@ void ata_detect( )
         case ATA_TYPE_ATA:
           printf("ata%d %s: ",channel,slave?" slave":"master");
           i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
-          if (sizeinmb < 1UL<<16)
-            printf(" ATA-%d Hard-Disk (%04u 
MBytes)\n",version,(Bit16u)sizeinmb);
-          else
-            printf(" ATA-%d Hard-Disk (%04u 
GBytes)\n",version,(Bit16u)(sizeinmb>>10));
+         if (sizeinmb < (1UL<<16))
+            printf(" ATA-%d Hard-Disk (%4u MBytes)\n", version, 
(Bit16u)sizeinmb);
+         else
+            printf(" ATA-%d Hard-Disk (%4u GBytes)\n", version, 
(Bit16u)(sizeinmb>>10));
           break;
         case ATA_TYPE_ATAPI:
           printf("ata%d %s: ",channel,slave?" slave":"master");
@@ -2852,17 +2900,17 @@ void ata_detect( )
   write_byte(ebda_seg,&EbdaData->ata.hdcount, hdcount);
   write_byte(ebda_seg,&EbdaData->ata.cdcount, cdcount);
   write_byte(0x40,0x75, hdcount);
- 
+
   printf("\n");
 
   // FIXME : should use bios=cmos|auto|disable bits
   // FIXME : should know about translation bits
-  // FIXME : move hard_drive_post here 
-  
+  // FIXME : move hard_drive_post here
+
 }
 
 // ---------------------------------------------------------------------------
-// ATA/ATAPI driver : software reset 
+// ATA/ATAPI driver : software reset
 // ---------------------------------------------------------------------------
 // ATA-3
 // 8.2.1 Software reset - Device 0
@@ -2872,7 +2920,8 @@ Bit16u device;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2;
-  Bit8u  channel, slave, sn, sc; 
+  Bit8u  channel, slave, sn, sc;
+  Bit8u  type;
   Bit16u max;
 
   channel = device / 2;
@@ -2887,16 +2936,13 @@ Bit16u device;
   outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST);
 
 // 8.2.1 (b) -- wait for BSY
-  max=0xff;
-  while(--max>0) {
-    Bit8u status = inb(iobase1+ATA_CB_STAT);
-    if ((status & ATA_CB_STAT_BSY) != 0) break;
-  }
+  await_ide(BSY, iobase1, 20);
 
 // 8.2.1 (f) -- clear SRST
   outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
 
-  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != 
ATA_TYPE_NONE) {
+  type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
+  if (type != ATA_TYPE_NONE) {
 
 // 8.2.1 (g) -- check for sc==sn==0x01
     // select device
@@ -2905,21 +2951,14 @@ Bit16u device;
     sn = inb(iobase1+ATA_CB_SN);
 
     if ( (sc==0x01) && (sn==0x01) ) {
+      if (type == ATA_TYPE_ATA) //ATA
+        await_ide(NOT_BSY_RDY, iobase1, IDE_TIMEOUT);
+      else //ATAPI
+        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
+    }
 
 // 8.2.1 (h) -- wait for not BSY
-      max=0xff;
-      while(--max>0) {
-        Bit8u status = inb(iobase1+ATA_CB_STAT);
-        if ((status & ATA_CB_STAT_BSY) == 0) break;
-        }
-      }
-    }
-
-// 8.2.1 (i) -- wait for DRDY
-  max=0xfff;
-  while(--max>0) {
-    Bit8u status = inb(iobase1+ATA_CB_STAT);
-      if ((status & ATA_CB_STAT_RDY) != 0) break;
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
   }
 
   // Enable interrupts
@@ -2927,7 +2966,7 @@ Bit16u device;
 }
 
 // ---------------------------------------------------------------------------
-// ATA/ATAPI driver : execute a non data command 
+// ATA/ATAPI driver : execute a non data command
 // ---------------------------------------------------------------------------
 
 Bit16u ata_cmd_non_data()
@@ -2945,9 +2984,9 @@ Bit16u ata_cmd_non_data()
       // 5 : more sectors to read/verify
       // 6 : no sectors left to write
       // 7 : more sectors to write
-Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba, 
segment, offset)
+Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, 
lba_low, lba_high, segment, offset)
 Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2, blksize;
@@ -2976,22 +3015,20 @@ Bit32u lba;
 
   // sector will be 0 only on lba access. Convert to lba-chs
   if (sector == 0) {
-    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
       outb(iobase1 + ATA_CB_FR, 0x00);
       outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
-      outb(iobase1 + ATA_CB_SN, lba >> 24);
-      outb(iobase1 + ATA_CB_CL, 0);
-      outb(iobase1 + ATA_CB_CH, 0);
+      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
       command |= 0x04;
       count &= (1UL << 8) - 1;
-      lba &= (1UL << 24) - 1;
+      lba_low &= (1UL << 24) - 1;
       }
-    sector = (Bit16u) (lba & 0x000000ffL);
-    lba >>= 8;
-    cylinder = (Bit16u) (lba & 0x0000ffffL);
-    lba >>= 16;
-    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
-    }
+    sector = (Bit16u) (lba_low & 0x000000ffL);
+    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+  }
 
   outb(iobase1 + ATA_CB_FR, 0x00);
   outb(iobase1 + ATA_CB_SC, count);
@@ -3001,10 +3038,8 @@ Bit32u lba;
   outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | 
(Bit8u) head );
   outb(iobase1 + ATA_CB_CMD, command);
 
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_data_in : read error\n");
@@ -3025,12 +3060,12 @@ ASM_START
 ASM_START
         push bp
         mov  bp, sp
-        mov  di, _ata_cmd_data_in.offset + 2[bp]  
-        mov  ax, _ata_cmd_data_in.segment + 2[bp] 
-        mov  cx, _ata_cmd_data_in.blksize + 2[bp] 
+        mov  di, _ata_cmd_data_in.offset + 2[bp]
+        mov  ax, _ata_cmd_data_in.segment + 2[bp]
+        mov  cx, _ata_cmd_data_in.blksize + 2[bp]
 
         ;; adjust if there will be an overrun. 2K max sector size
-        cmp   di, #0xf800 ;; 
+        cmp   di, #0xf800 ;;
         jbe   ata_in_no_adjust
 
 ata_in_adjust:
@@ -3042,7 +3077,7 @@ ata_in_no_adjust:
 
         mov   dx, _ata_cmd_data_in.iobase1 + 2[bp] ;; ATA data read port
 
-        mov  ah, _ata_cmd_data_in.mode + 2[bp] 
+        mov  ah, _ata_cmd_data_in.mode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_in_32
 
@@ -3064,9 +3099,10 @@ ASM_END
     current++;
     write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
     count--;
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
     status = inb(iobase1 + ATA_CB_STAT);
     if (count == 0) {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != ATA_CB_STAT_RDY ) {
         BX_DEBUG_ATA("ata_cmd_data_in : no sectors left (status %02x)\n", 
(unsigned) status);
         return 4;
@@ -3074,7 +3110,7 @@ ASM_END
       break;
       }
     else {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
         BX_DEBUG_ATA("ata_cmd_data_in : more sectors left (status %02x)\n", 
(unsigned) status);
         return 5;
@@ -3099,9 +3135,9 @@ ASM_END
       // 5 : more sectors to read/verify
       // 6 : no sectors left to write
       // 7 : more sectors to write
-Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba, 
segment, offset)
+Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, 
lba_low, lba_high, segment, offset)
 Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2, blksize;
@@ -3130,22 +3166,20 @@ Bit32u lba;
 
   // sector will be 0 only on lba access. Convert to lba-chs
   if (sector == 0) {
-    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
       outb(iobase1 + ATA_CB_FR, 0x00);
       outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
-      outb(iobase1 + ATA_CB_SN, lba >> 24);
-      outb(iobase1 + ATA_CB_CL, 0);
-      outb(iobase1 + ATA_CB_CH, 0);
+      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
       command |= 0x04;
       count &= (1UL << 8) - 1;
-      lba &= (1UL << 24) - 1;
+      lba_low &= (1UL << 24) - 1;
       }
-    sector = (Bit16u) (lba & 0x000000ffL);
-    lba >>= 8;
-    cylinder = (Bit16u) (lba & 0x0000ffffL);
-    lba >>= 16;
-    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
-    }
+    sector = (Bit16u) (lba_low & 0x000000ffL);
+    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+  }
 
   outb(iobase1 + ATA_CB_FR, 0x00);
   outb(iobase1 + ATA_CB_SC, count);
@@ -3155,10 +3189,8 @@ Bit32u lba;
   outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | 
(Bit8u) head );
   outb(iobase1 + ATA_CB_CMD, command);
 
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_data_out : read error\n");
@@ -3179,12 +3211,12 @@ ASM_START
 ASM_START
         push bp
         mov  bp, sp
-        mov  si, _ata_cmd_data_out.offset + 2[bp]  
-        mov  ax, _ata_cmd_data_out.segment + 2[bp] 
-        mov  cx, _ata_cmd_data_out.blksize + 2[bp] 
+        mov  si, _ata_cmd_data_out.offset + 2[bp]
+        mov  ax, _ata_cmd_data_out.segment + 2[bp]
+        mov  cx, _ata_cmd_data_out.blksize + 2[bp]
 
         ;; adjust if there will be an overrun. 2K max sector size
-        cmp   si, #0xf800 ;; 
+        cmp   si, #0xf800 ;;
         jbe   ata_out_no_adjust
 
 ata_out_adjust:
@@ -3196,7 +3228,7 @@ ata_out_no_adjust:
 
         mov   dx, _ata_cmd_data_out.iobase1 + 2[bp] ;; ATA data write port
 
-        mov  ah, _ata_cmd_data_out.mode + 2[bp] 
+        mov  ah, _ata_cmd_data_out.mode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_out_32
 
@@ -3222,7 +3254,7 @@ ASM_END
     count--;
     status = inb(iobase1 + ATA_CB_STAT);
     if (count == 0) {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
           != ATA_CB_STAT_RDY ) {
         BX_DEBUG_ATA("ata_cmd_data_out : no sectors left (status %02x)\n", 
(unsigned) status);
         return 6;
@@ -3230,7 +3262,7 @@ ASM_END
       break;
       }
     else {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
         BX_DEBUG_ATA("ata_cmd_data_out : more sectors left (status %02x)\n", 
(unsigned) status);
         return 7;
@@ -3297,19 +3329,17 @@ Bit32u length;
   if (status & ATA_CB_STAT_BSY) return 2;
 
   outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
-  // outb(iobase1 + ATA_CB_FR, 0x00);
-  // outb(iobase1 + ATA_CB_SC, 0x00);
-  // outb(iobase1 + ATA_CB_SN, 0x00);
+  outb(iobase1 + ATA_CB_FR, 0x00);
+  outb(iobase1 + ATA_CB_SC, 0x00);
+  outb(iobase1 + ATA_CB_SN, 0x00);
   outb(iobase1 + ATA_CB_CL, 0xfff0 & 0x00ff);
   outb(iobase1 + ATA_CB_CH, 0xfff0 >> 8);
   outb(iobase1 + ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
   outb(iobase1 + ATA_CB_CMD, ATA_CMD_PACKET);
 
   // Device should ok to receive command
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_packet : error, status is %02x\n",status);
@@ -3326,13 +3356,13 @@ Bit32u length;
   // Send command to device
 ASM_START
       sti  ;; enable higher priority interrupts
- 
+
       push bp
       mov  bp, sp
-    
-      mov  si, _ata_cmd_packet.cmdoff + 2[bp]  
-      mov  ax, _ata_cmd_packet.cmdseg + 2[bp] 
-      mov  cx, _ata_cmd_packet.cmdlen + 2[bp] 
+
+      mov  si, _ata_cmd_packet.cmdoff + 2[bp]
+      mov  ax, _ata_cmd_packet.cmdseg + 2[bp]
+      mov  cx, _ata_cmd_packet.cmdlen + 2[bp]
       mov  es, ax      ;; segment in es
 
       mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data write port
@@ -3345,32 +3375,38 @@ ASM_END
 ASM_END
 
   if (inout == ATA_DATA_NO) {
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
     status = inb(iobase1 + ATA_CB_STAT);
     }
   else {
+        Bit16u loops = 0;
+        Bit8u sc;
   while (1) {
 
+      if (loops == 0) {//first time through
+        status = inb(iobase2 + ATA_CB_ASTAT);
+        await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+      }
+      else
+        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
+      loops++;
+
       status = inb(iobase1 + ATA_CB_STAT);
+      sc = inb(iobase1 + ATA_CB_SC);
 
       // Check if command completed
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_DRQ) ) ==0 ) break;
+      if(((inb(iobase1 + ATA_CB_SC)&0x7)==0x3) &&
+         ((status & (ATA_CB_STAT_RDY | ATA_CB_STAT_ERR)) == ATA_CB_STAT_RDY)) 
break;
 
       if (status & ATA_CB_STAT_ERR) {
         BX_DEBUG_ATA("ata_cmd_packet : error (status %02x)\n",status);
         return 3;
       }
 
-      // Device must be ready to send data
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
-            != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
-        BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", status);
-        return 4;
-        }
-
       // Normalize address
       bufseg += (bufoff / 16);
       bufoff %= 16;
-    
+
       // Get the byte count
       lcount =  ((Bit16u)(inb(iobase1 + ATA_CB_CH))<<8)+inb(iobase1 + 
ATA_CB_CL);
 
@@ -3431,10 +3467,10 @@ ASM_START
 
         mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data read port
 
-        mov  cx, _ata_cmd_packet.lbefore + 2[bp] 
+        mov  cx, _ata_cmd_packet.lbefore + 2[bp]
         jcxz ata_packet_no_before
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_before_32
 
@@ -3451,14 +3487,14 @@ ata_packet_in_before_32_loop:
         pop  eax
 
 ata_packet_no_before:
-        mov  cx, _ata_cmd_packet.lcount + 2[bp] 
+        mov  cx, _ata_cmd_packet.lcount + 2[bp]
         jcxz ata_packet_after
 
-        mov  di, _ata_cmd_packet.bufoff + 2[bp]  
-        mov  ax, _ata_cmd_packet.bufseg + 2[bp] 
+        mov  di, _ata_cmd_packet.bufoff + 2[bp]
+        mov  ax, _ata_cmd_packet.bufseg + 2[bp]
         mov  es, ax
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_32
 
@@ -3472,10 +3508,10 @@ ata_packet_in_32:
           insd ;; CX dwords transfered to port(DX) to ES:[DI]
 
 ata_packet_after:
-        mov  cx, _ata_cmd_packet.lafter + 2[bp] 
+        mov  cx, _ata_cmd_packet.lafter + 2[bp]
         jcxz ata_packet_done
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_after_32
 
@@ -3505,7 +3541,7 @@ ASM_END
     }
 
   // Final check, device must be ready
-  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
+  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
          != ATA_CB_STAT_RDY ) {
     BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", (unsigned) 
status);
     return 4;
@@ -3524,55 +3560,108 @@ ASM_END
 // Start of ATA/ATAPI generic functions
 // ---------------------------------------------------------------------------
 
-  Bit16u 
-atapi_get_sense(device)
+  Bit16u
+atapi_get_sense(device, seg, asc, ascq)
   Bit16u device;
 {
   Bit8u  atacmd[12];
-  Bit8u  buffer[16];
+  Bit8u  buffer[18];
   Bit8u i;
 
   memsetb(get_SS(),atacmd,0,12);
 
-  // Request SENSE 
-  atacmd[0]=0x03;    
-  atacmd[4]=0x20;    
-  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 16L, ATA_DATA_IN, 
get_SS(), buffer) != 0)
+  // Request SENSE
+  atacmd[0]=ATA_CMD_REQUEST_SENSE;
+  atacmd[4]=sizeof(buffer);
+  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 18L, ATA_DATA_IN, 
get_SS(), buffer) != 0)
     return 0x0002;
 
-  if ((buffer[0] & 0x7e) == 0x70) {
-    return (((Bit16u)buffer[2]&0x0f)*0x100)+buffer[12];
-    }
+  write_byte(seg,asc,buffer[12]);
+  write_byte(seg,ascq,buffer[13]);
 
   return 0;
 }
 
-  Bit16u 
+  Bit16u
 atapi_is_ready(device)
   Bit16u device;
 {
-  Bit8u  atacmd[12];
-  Bit8u  buffer[];
-
-  memsetb(get_SS(),atacmd,0,12);
- 
-  // Test Unit Ready
-  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, 
get_SS(), buffer) != 0)
-    return 0x000f;
-
-  if (atapi_get_sense(device) !=0 ) {
-    memsetb(get_SS(),atacmd,0,12);
-
-    // try to send Test Unit Ready again
-    if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, 
get_SS(), buffer) != 0)
-      return 0x000f;
-
-    return atapi_get_sense(device);
+  Bit8u packet[12];
+  Bit8u buf[8];
+  Bit32u block_len;
+  Bit32u sectors;
+  Bit32u timeout; //measured in ms
+  Bit32u time;
+  Bit8u asc, ascq;
+  Bit8u in_progress;
+  Bit16u ebda_seg = read_word(0x0040,0x000E);
+  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != 
ATA_TYPE_ATAPI) {
+    printf("not implemented for non-ATAPI device\n");
+    return -1;
+  }
+
+  BX_DEBUG_ATA("ata_detect_medium: begin\n");
+  memsetb(get_SS(),packet, 0, sizeof packet);
+  packet[0] = 0x25; /* READ CAPACITY */
+
+  /* Retry READ CAPACITY 50 times unless MEDIUM NOT PRESENT
+   * is reported by the device. If the device reports "IN PROGRESS",
+   * 30 seconds is added. */
+  timeout = 5000;
+  time = 0;
+  in_progress = 0;
+  while (time < timeout) {
+    if (ata_cmd_packet(device, sizeof(packet), get_SS(), packet, 0, 8L, 
ATA_DATA_IN, get_SS(), buf) == 0)
+      goto ok;
+
+    if (atapi_get_sense(device, get_SS(), &asc, &ascq) == 0) {
+      if (asc == 0x3a) { /* MEDIUM NOT PRESENT */
+        BX_DEBUG_ATA("Device reports MEDIUM NOT PRESENT\n");
+        return -1;
+      }
+
+      if (asc == 0x04 && ascq == 0x01 && !in_progress) {
+        /* IN PROGRESS OF BECOMING READY */
+        printf("Waiting for device to detect medium... ");
+        /* Allow 30 seconds more */
+        timeout = 30000;
+        in_progress = 1;
+      }
     }
+    time += 100;
+  }
+  BX_DEBUG_ATA("read capacity failed\n");
+  return -1;
+ok:
+
+  block_len = (Bit32u) buf[4] << 24
+    | (Bit32u) buf[5] << 16
+    | (Bit32u) buf[6] << 8
+    | (Bit32u) buf[7] << 0;
+  BX_DEBUG_ATA("block_len=%u\n", block_len);
+
+  if (block_len!= 2048 && block_len!= 512)
+  {
+    printf("Unsupported sector size %u\n", block_len);
+    return -1;
+  }
+  write_dword(ebda_seg,&EbdaData->ata.devices[device].blksize, block_len);
+
+  sectors = (Bit32u) buf[0] << 24
+    | (Bit32u) buf[1] << 16
+    | (Bit32u) buf[2] << 8
+    | (Bit32u) buf[3] << 0;
+
+  BX_DEBUG_ATA("sectors=%u\n", sectors);
+  if (block_len == 2048)
+    sectors <<= 2; /* # of sectors in 512-byte "soft" sector */
+  if (sectors != 
read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low))
+    printf("%dMB medium detected\n", sectors>>(20-9));
+  write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors);
   return 0;
 }
 
-  Bit16u 
+  Bit16u
 atapi_is_cdrom(device)
   Bit8u device;
 {
@@ -3632,7 +3721,7 @@ static char eltorito[24]="EL TORITO SPEC
 //
 // Returns ah: emulated drive, al: error code
 //
-  Bit16u 
+  Bit16u
 cdrom_boot()
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
@@ -3645,9 +3734,12 @@ cdrom_boot()
   for (device=0; device<BX_MAX_ATA_DEVICES;device++) {
     if (atapi_is_cdrom(device)) break;
     }
-  
+
   // if not found
   if(device >= BX_MAX_ATA_DEVICES) return 2;
+
+  if(error = atapi_is_ready(device) != 0)
+    BX_INFO("ata_is_ready returned %d\n",error);
 
   // Read the Boot Record Volume Descriptor
   memsetb(get_SS(),atacmd,0,12);
@@ -3668,7 +3760,7 @@ cdrom_boot()
    }
   for(i=0;i<23;i++)
     if(buffer[7+i]!=read_byte(0xf000,&eltorito[i]))return 6;
-  
+
   // ok, now we calculate the Boot catalog address
   
lba=buffer[0x4A]*0x1000000+buffer[0x49]*0x10000+buffer[0x48]*0x100+buffer[0x47];
 
@@ -3683,7 +3775,7 @@ cdrom_boot()
   atacmd[5]=(lba & 0x000000ff);
   if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, 
ATA_DATA_IN, get_SS(), buffer)) != 0)
     return 7;
- 
+
   // Validation entry
   if(buffer[0x00]!=0x01)return 8;   // Header
   if(buffer[0x01]!=0x00)return 9;   // Platform
@@ -3702,10 +3794,10 @@ cdrom_boot()
 
   write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
   if(buffer[0x21]==0){
-    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0. 
+    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
     // Win2000 cd boot needs to know it booted from cd
     write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0xE0);
-    } 
+    }
   else if(buffer[0x21]<4)
     write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x00);
   else
@@ -3719,7 +3811,7 @@ cdrom_boot()
 
   write_word(ebda_seg,&EbdaData->cdemu.load_segment,boot_segment);
   write_word(ebda_seg,&EbdaData->cdemu.buffer_segment,0x0000);
-  
+
   nbsectors=buffer[0x27]*0x100+buffer[0x26];
   write_word(ebda_seg,&EbdaData->cdemu.sector_count,nbsectors);
 
@@ -3743,7 +3835,6 @@ cdrom_boot()
   /* measure 1st 512 bytes  */
   tcpa_ipl((Bit32u)1L,(Bit32u)boot_segment,(Bit32u)0L,(Bit32u)512L);
 #endif
-
 
   // Remember the media type
   switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
@@ -3765,7 +3856,7 @@ cdrom_boot()
     case 0x04:  // Harddrive
       
write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,read_byte(boot_segment,446+6)&0x3f);
       write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,
-             (read_byte(boot_segment,446+6)<<2) + 
read_byte(boot_segment,446+7) + 1);
+              (read_byte(boot_segment,446+6)<<2) + 
read_byte(boot_segment,446+7) + 1);
       
write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,read_byte(boot_segment,446+5)
 + 1);
       break;
    }
@@ -3778,7 +3869,7 @@ cdrom_boot()
       write_byte(ebda_seg, &EbdaData->ata.hdcount, read_byte(ebda_seg, 
&EbdaData->ata.hdcount) + 1);
    }
 
-  
+
   // everything is ok, so from now on, the emulation is active
   if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0)
     write_byte(ebda_seg,&EbdaData->cdemu.active,0x01);
@@ -4124,9 +4215,10 @@ ASM_END
       regs.u.r8.al = inb_cmos(0x30);
       regs.u.r8.ah = inb_cmos(0x31);
 
-      // limit to 15M
-      if(regs.u.r16.ax > 0x3c00)
-        regs.u.r16.ax = 0x3c00;
+      // According to Ralf Brown's interrupt the limit should be 15M,
+      // but real machines mostly return max. 63M.
+      if(regs.u.r16.ax > 0xffc0)
+        regs.u.r16.ax = 0xffc0;
 
       CLEAR_CF();
 #endif
@@ -4344,13 +4436,35 @@ BX_DEBUG_INT15("case 2:\n");
 
         case 3: // Set Resolution
 BX_DEBUG_INT15("case 3:\n");
-          // BX:
+          // BH:
           //      0 =  25 dpi, 1 count  per millimeter
           //      1 =  50 dpi, 2 counts per millimeter
           //      2 = 100 dpi, 4 counts per millimeter
           //      3 = 200 dpi, 8 counts per millimeter
-          CLEAR_CF();
-          regs.u.r8.ah = 0;
+          comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and 
packets
+          if (regs.u.r8.bh < 4) {
+            ret = send_to_mouse_ctrl(0xE8); // set resolution command
+            if (ret == 0) {
+              ret = get_mouse_data(&mouse_data1);
+              if (mouse_data1 != 0xfa)
+                BX_PANIC("Mouse status returned %02x (should be ack)\n", 
(unsigned)mouse_data1);
+              ret = send_to_mouse_ctrl(regs.u.r8.bh);
+              ret = get_mouse_data(&mouse_data1);
+              if (mouse_data1 != 0xfa)
+                BX_PANIC("Mouse status returned %02x (should be ack)\n", 
(unsigned)mouse_data1);
+              CLEAR_CF();
+              regs.u.r8.ah = 0;
+            } else {
+              // error
+              SET_CF();
+              regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+            }
+          } else {
+            // error
+            SET_CF();
+            regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+          }
+          set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
           break;
 
         case 4: // Get Device ID
@@ -4472,7 +4586,30 @@ BX_DEBUG_INT15("case default:\n");
       break;
     }
 }
-#endif
+#endif // BX_USE_PS2_MOUSE
+
+
+void set_e820_range(ES, DI, start, end, type)
+     Bit16u ES;
+     Bit16u DI;
+     Bit32u start;
+     Bit32u end;
+     Bit16u type;
+{
+    write_word(ES, DI, start);
+    write_word(ES, DI+2, start >> 16);
+    write_word(ES, DI+4, 0x00);
+    write_word(ES, DI+6, 0x00);
+
+    end -= start;
+    write_word(ES, DI+8, end);
+    write_word(ES, DI+10, end >> 16);
+    write_word(ES, DI+12, 0x0000);
+    write_word(ES, DI+14, 0x0000);
+
+    write_word(ES, DI+16, type);
+    write_word(ES, DI+18, 0x0);
+}
 
   void
 int15_function32(regs, ES, DS, FLAGS)
@@ -4486,17 +4623,20 @@ BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.
 
   switch (regs.u.r8.ah) {
     case 0x86:
-      // Wait for CX:DX microseconds. currently using the 
-      // refresh request port 0x61 bit4, toggling every 15usec 
+      // Wait for CX:DX microseconds. currently using the
+      // refresh request port 0x61 bit4, toggling every 15usec
 
       CX = regs.u.r16.cx;
       DX = regs.u.r16.dx;
 
 ASM_START
+      sti
+
       ;; Get the count in eax
-      mov  ax, .int15_function32.CX [bp]
+      mov  bx, sp
+      mov  ax, _int15_function32.CX [bx]
       shl  eax, #16
-      mov  ax, .int15_function32.DX [bp]
+      mov  ax, _int15_function32.DX [bx]
 
       ;; convert to numbers of 15usec ticks
       mov ebx, #15
@@ -4527,7 +4667,8 @@ ASM_END
     case 0xe8:
         switch(regs.u.r8.al)
         {
-        case 0x20: {
+#ifdef HVMASSIST
+       case 0x20: {
             Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
 
             if (regs.u.r32.edx != 0x534D4150) /* SMAP */
@@ -4575,7 +4716,7 @@ ASM_END
             // Get the amount of extended memory (above 1M)
             regs.u.r8.cl = inb_cmos(0x30);
             regs.u.r8.ch = inb_cmos(0x31);
-          
+
             // limit to 15M
             if (regs.u.r16.cx > (15*1024))
                 regs.u.r16.cx = 15*1024;
@@ -4603,7 +4744,7 @@ ASM_END
             regs.u.r16.bx = regs.u.r16.dx;
             break;
         }
-       default:  /* AH=0xE8?? but not implemented */
+        default:  /* AH=0xE8?? but not implemented */
             goto int15_unimplemented;
         }
         break;
@@ -4616,16 +4757,178 @@ ASM_END
       regs.u.r8.ah = UNSUPPORTED_FUNCTION;
       break;
     }
+#else
+         case 0x20: // coded by osmaker aka K.J.
+            if(regs.u.r32.edx == 0x534D4150)
+            {
+                extended_memory_size = inb_cmos(0x35);
+                extended_memory_size <<= 8;
+                extended_memory_size |= inb_cmos(0x34);
+                extended_memory_size *= 64;
+                // greater than EFF00000???
+                if(extended_memory_size > 0x3bc000) {
+                    extended_memory_size = 0x3bc000; // everything after this 
is reserved memory until we get to 0x100000000
+                }
+                extended_memory_size *= 1024;
+                extended_memory_size += (16L * 1024 * 1024);
+
+                if(extended_memory_size <= (16L * 1024 * 1024)) {
+                    extended_memory_size = inb_cmos(0x31);
+                    extended_memory_size <<= 8;
+                    extended_memory_size |= inb_cmos(0x30);
+                    extended_memory_size *= 1024;
+                    extended_memory_size += (1L * 1024 * 1024);
+                }
+
+                switch(regs.u.r16.bx)
+                {
+                    case 0:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x0000000L, 0x0009f000L, 1);
+                        regs.u.r32.ebx = 1;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 1:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x0009f000L, 0x000a0000L, 2);
+                        regs.u.r32.ebx = 2;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 2:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x000e8000L, 0x00100000L, 2);
+                        regs.u.r32.ebx = 3;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 3:
+#if BX_ROMBIOS32
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x00100000L,
+                                       extended_memory_size - ACPI_DATA_SIZE, 
1);
+                        regs.u.r32.ebx = 4;
+#else
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x00100000L,
+                                       extended_memory_size, 1);
+                        regs.u.r32.ebx = 5;
+#endif
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 4:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       extended_memory_size - ACPI_DATA_SIZE,
+                                       extended_memory_size, 3); // ACPI RAM
+                        regs.u.r32.ebx = 5;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 5:
+                        /* 256KB BIOS area at the end of 4 GB */
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0xfffc0000L, 0x00000000L, 2);
+                        regs.u.r32.ebx = 0;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                    default:  /* AX=E820, DX=534D4150, BX unrecognized */
+                        goto int15_unimplemented;
+                        break;
+                }
+            } else {
+              // if DX != 0x534D4150)
+              goto int15_unimplemented;
+            }
+            break;
+
+        case 0x01:
+          // do we have any reason to fail here ?
+          CLEAR_CF();
+
+          // my real system sets ax and bx to 0
+          // this is confirmed by Ralph Brown list
+          // but syslinux v1.48 is known to behave
+          // strangely if ax is set to 0
+          // regs.u.r16.ax = 0;
+          // regs.u.r16.bx = 0;
+
+          // Get the amount of extended memory (above 1M)
+          regs.u.r8.cl = inb_cmos(0x30);
+          regs.u.r8.ch = inb_cmos(0x31);
+
+          // limit to 15M
+          if(regs.u.r16.cx > 0x3c00)
+          {
+            regs.u.r16.cx = 0x3c00;
+          }
+
+          // Get the amount of extended memory above 16M in 64k blocs
+          regs.u.r8.dl = inb_cmos(0x34);
+          regs.u.r8.dh = inb_cmos(0x35);
+
+          // Set configured memory equal to extended memory
+          regs.u.r16.ax = regs.u.r16.cx;
+          regs.u.r16.bx = regs.u.r16.dx;
+          break;
+        default:  /* AH=0xE8?? but not implemented */
+          goto int15_unimplemented;
+       }
+       break;
+    int15_unimplemented:
+       // fall into the default
+    default:
+      BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+        (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+      SET_CF();
+      regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+      break;
+    }
+#endif /* HVMASSIST */
 }
 
   void
 int16_function(DI, SI, BP, SP, BX, DX, CX, AX, FLAGS)
   Bit16u DI, SI, BP, SP, BX, DX, CX, AX, FLAGS;
 {
-  Bit8u scan_code, ascii_code, shift_flags, count;
+  Bit8u scan_code, ascii_code, shift_flags, led_flags, count;
   Bit16u kbd_code, max;
 
   BX_DEBUG_INT16("int16: AX=%04x BX=%04x CX=%04x DX=%04x \n", AX, BX, CX, DX);
+
+  shift_flags = read_byte(0x0040, 0x17);
+  led_flags = read_byte(0x0040, 0x97);
+  if ((((shift_flags >> 4) & 0x07) ^ (led_flags & 0x07)) != 0) {
+ASM_START
+    cli
+ASM_END
+    outb(0x60, 0xed);
+    while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+    if ((inb(0x60) == 0xfa)) {
+      led_flags &= 0xf8;
+      led_flags |= ((shift_flags >> 4) & 0x07);
+      outb(0x60, led_flags & 0x07);
+      while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+      inb(0x60);
+      write_byte(0x0040, 0x97, led_flags);
+    }
+ASM_START
+    sti
+ASM_END
+  }
 
   switch (GET_AH()) {
     case 0x00: /* read keyboard input */
@@ -4664,7 +4967,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x09: /* GET KEYBOARD FUNCTIONALITY */
-      // bit Bochs Description     
+      // bit Bochs Description
       //  7    0   reserved
       //  6    0   INT 16/AH=20h-22h supported (122-key keyboard support)
       //  5    1   INT 16/AH=10h-12h supported (enhanced keyboard support)
@@ -4694,7 +4997,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
               kbd_code |= (inb(0x60) << 8);
             }
           } while (--count>0);
-       }
+        }
       }
       BX=kbd_code;
       break;
@@ -4721,7 +5024,8 @@ int16_function(DI, SI, BP, SP, BX, DX, C
     case 0x12: /* get extended keyboard status */
       shift_flags = read_byte(0x0040, 0x17);
       SET_AL(shift_flags);
-      shift_flags = read_byte(0x0040, 0x18);
+      shift_flags = read_byte(0x0040, 0x18) & 0x73;
+      shift_flags |= read_byte(0x0040, 0x96) & 0x0c;
       SET_AH(shift_flags);
       BX_DEBUG_INT16("int16: func 12 sending %04x\n",AX);
       break;
@@ -4736,7 +5040,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
 
     case 0x6F:
       if (GET_AL() == 0x08)
-       SET_AH(0x02); // unsupported, aka normal keyboard
+        SET_AH(0x02); // unsupported, aka normal keyboard
 
     default:
       BX_INFO("KBD: unsupported int 16h function %02x\n", GET_AH());
@@ -4877,7 +5181,7 @@ int09_function(DI, SI, BP, SP, BX, DX, C
   Bit16u DI, SI, BP, SP, BX, DX, CX, AX;
 {
   Bit8u scancode, asciicode, shift_flags;
-  Bit8u mf2_flags, mf2_state, led_flags;
+  Bit8u mf2_flags, mf2_state;
 
   //
   // DS has been set to F000 before call
@@ -4895,7 +5199,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
   shift_flags = read_byte(0x0040, 0x17);
   mf2_flags = read_byte(0x0040, 0x18);
   mf2_state = read_byte(0x0040, 0x96);
-  led_flags = read_byte(0x0040, 0x97);
   asciicode = 0;
 
   switch (scancode) {
@@ -4904,8 +5207,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       write_byte(0x0040, 0x17, shift_flags);
       mf2_flags |= 0x40;
       write_byte(0x0040, 0x18, mf2_flags);
-      led_flags ^= 0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xba: /* Caps Lock release */
       mf2_flags &= ~0x40;
@@ -4913,11 +5214,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x2a: /* L Shift press */
-      /*shift_flags &= ~0x40;*/
       shift_flags |= 0x02;
       write_byte(0x0040, 0x17, shift_flags);
-      led_flags &= ~0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xaa: /* L Shift release */
       shift_flags &= ~0x02;
@@ -4925,11 +5223,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x36: /* R Shift press */
-      /*shift_flags &= ~0x40;*/
       shift_flags |= 0x01;
       write_byte(0x0040, 0x17, shift_flags);
-      led_flags &= ~0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xb6: /* R Shift release */
       shift_flags &= ~0x01;
@@ -4937,71 +5232,75 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x1d: /* Ctrl press */
-      shift_flags |= 0x04;
-      write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags |= 0x04;
-      } else {
-        mf2_flags |= 0x01;
+      if ((mf2_state & 0x01) == 0) {
+        shift_flags |= 0x04;
+        write_byte(0x0040, 0x17, shift_flags);
+        if (mf2_state & 0x02) {
+          mf2_state |= 0x04;
+          write_byte(0x0040, 0x96, mf2_state);
+        } else {
+          mf2_flags |= 0x01;
+          write_byte(0x0040, 0x18, mf2_flags);
         }
-      write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
     case 0x9d: /* Ctrl release */
-      shift_flags &= ~0x04;
-      write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags &= ~0x04;
-      } else {
-        mf2_flags &= ~0x01;
+      if ((mf2_state & 0x01) == 0) {
+        shift_flags &= ~0x04;
+        write_byte(0x0040, 0x17, shift_flags);
+        if (mf2_state & 0x02) {
+          mf2_state &= ~0x04;
+          write_byte(0x0040, 0x96, mf2_state);
+        } else {
+          mf2_flags &= ~0x01;
+          write_byte(0x0040, 0x18, mf2_flags);
         }
-      write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
 
     case 0x38: /* Alt press */
       shift_flags |= 0x08;
       write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags |= 0x08;
+      if (mf2_state & 0x02) {
+        mf2_state |= 0x08;
+        write_byte(0x0040, 0x96, mf2_state);
       } else {
         mf2_flags |= 0x02;
-        }
-      write_byte(0x0040, 0x18, mf2_flags);
+        write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
     case 0xb8: /* Alt release */
       shift_flags &= ~0x08;
       write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags &= ~0x08;
+      if (mf2_state & 0x02) {
+        mf2_state &= ~0x08;
+        write_byte(0x0040, 0x96, mf2_state);
       } else {
         mf2_flags &= ~0x02;
-        }
-      write_byte(0x0040, 0x18, mf2_flags);
+        write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
 
     case 0x45: /* Num Lock press */
-      if ((mf2_state & 0x01) == 0) {
+      if ((mf2_state & 0x03) == 0) {
         mf2_flags |= 0x20;
         write_byte(0x0040, 0x18, mf2_flags);
         shift_flags ^= 0x20;
-        led_flags ^= 0x02;
         write_byte(0x0040, 0x17, shift_flags);
-        write_byte(0x0040, 0x97, led_flags);
-        }
+      }
       break;
     case 0xc5: /* Num Lock release */
-      if ((mf2_state & 0x01) == 0) {
+      if ((mf2_state & 0x03) == 0) {
         mf2_flags &= ~0x20;
         write_byte(0x0040, 0x18, mf2_flags);
-        }
+      }
       break;
 
     case 0x46: /* Scroll Lock press */
       mf2_flags |= 0x10;
       write_byte(0x0040, 0x18, mf2_flags);
       shift_flags ^= 0x10;
-      led_flags ^= 0x01;
       write_byte(0x0040, 0x17, shift_flags);
-      write_byte(0x0040, 0x97, led_flags);
       break;
 
     case 0xc6: /* Scroll Lock release */
@@ -5014,50 +5313,55 @@ int09_function(DI, SI, BP, SP, BX, DX, C
             machine_reset();
         /* Fall through */
     default:
-      if (scancode & 0x80) return; /* toss key releases ... */
+      if (scancode & 0x80) {
+        break; /* toss key releases ... */
+      }
       if (scancode > MAX_SCAN_CODE) {
-        BX_INFO("KBD: int09h_handler(): unknown scancode (%x) read!\n", 
scancode);
+        BX_INFO("KBD: int09h_handler(): unknown scancode read: 0x%02x!\n", 
scancode);
         return;
-        }
+      }
       if (shift_flags & 0x08) { /* ALT */
         asciicode = scan_to_scanascii[scancode].alt;
         scancode = scan_to_scanascii[scancode].alt >> 8;
-        }
-      else if (shift_flags & 0x04) { /* CONTROL */
+      } else if (shift_flags & 0x04) { /* CONTROL */
         asciicode = scan_to_scanascii[scancode].control;
         scancode = scan_to_scanascii[scancode].control >> 8;
-        }
-      else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
-        /* check if lock state should be ignored 
+      } else if (((mf2_state & 0x02) > 0) && ((scancode >= 0x47) && (scancode 
<= 0x53))) {
+        /* extended keys handling */
+        asciicode = 0xe0;
+        scancode = scan_to_scanascii[scancode].normal >> 8;
+      } else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
+        /* check if lock state should be ignored
          * because a SHIFT key are pressed */
-         
+
         if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
           asciicode = scan_to_scanascii[scancode].normal;
           scancode = scan_to_scanascii[scancode].normal >> 8;
-          }
-        else {
+        } else {
           asciicode = scan_to_scanascii[scancode].shift;
           scancode = scan_to_scanascii[scancode].shift >> 8;
-          }
         }
-      else {
+      } else {
         /* check if lock is on */
         if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
           asciicode = scan_to_scanascii[scancode].shift;
           scancode = scan_to_scanascii[scancode].shift >> 8;
-          }
-        else {
+        } else {
           asciicode = scan_to_scanascii[scancode].normal;
           scancode = scan_to_scanascii[scancode].normal >> 8;
-          }
         }
+      }
       if (scancode==0 && asciicode==0) {
         BX_INFO("KBD: int09h_handler(): scancode & asciicode are zero?\n");
-        }
+      }
       enqueue_key(scancode, asciicode);
       break;
-    }
-  mf2_state &= ~0x01;
+  }
+  if ((scancode & 0x7f) != 0x1d) {
+    mf2_state &= ~0x01;
+  }
+  mf2_state &= ~0x02;
+  write_byte(0x0040, 0x96, mf2_state);
 }
 
   unsigned int
@@ -5065,9 +5369,6 @@ enqueue_key(scan_code, ascii_code)
   Bit8u scan_code, ascii_code;
 {
   Bit16u buffer_start, buffer_end, buffer_head, buffer_tail, temp_tail;
-
-  //BX_INFO("KBD:   enqueue_key() called scan:%02x, ascii:%02x\n",
-  //    scan_code, ascii_code);
 
 #if BX_CPU < 2
   buffer_start = 0x001E;
@@ -5118,9 +5419,8 @@ BX_DEBUG_INT74("int74: read byte %02x\n"
   mouse_flags_2 = read_byte(ebda_seg, 0x0027);
 
   if ( (mouse_flags_2 & 0x80) != 0x80 ) {
-      //    BX_PANIC("int74_function:\n");
       return;
-    }
+  }
 
   package_count = mouse_flags_2 & 0x07;
   index = mouse_flags_1 & 0x07;
@@ -5148,10 +5448,10 @@ BX_DEBUG_INT74("int74_function: make_far
 #if BX_USE_ATADRV
 
   void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
-  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
 {
-  Bit32u lba;
+  Bit32u lba_low, lba_high;
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u cylinder, head, sector;
   Bit16u segment, offset;
@@ -5172,12 +5472,12 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
   // Get the ata channel
   device=read_byte(ebda_seg,&EbdaData->ata.hdidmap[GET_ELDL()-0x80]);
 
-  // basic check : device has to be valid 
+  // basic check : device has to be valid
   if (device >= BX_MAX_ATA_DEVICES) {
     BX_INFO("int13_harddisk: function %02x, unmapped device for ELDL=%02x\n", 
GET_AH(), GET_ELDL());
     goto int13_fail;
     }
-  
+
   switch (GET_AH()) {
 
     case 0x00: /* disk controller reset */
@@ -5195,7 +5495,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       break;
 
     case 0x02: // read disk sectors
-    case 0x03: // write disk sectors 
+    case 0x03: // write disk sectors
     case 0x04: // verify disk sectors
 
       count       = GET_AL();
@@ -5207,10 +5507,10 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       segment = ES;
       offset  = BX;
 
-      if ( (count > 128) || (count == 0) ) {
-        BX_INFO("int13_harddisk: function %02x, count out of 
range!\n",GET_AH());
+      if ((count > 128) || (count == 0) || (sector == 0)) {
+        BX_INFO("int13_harddisk: function %02x, parameter out of 
range!\n",GET_AH());
         goto int13_fail;
-        }
+      }
 
       nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
       nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
@@ -5221,7 +5521,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         BX_INFO("int13_harddisk: function %02x, parameters out of range 
%04x/%04x/%04x!\n", GET_AH(), cylinder, head, sector);
         goto int13_fail;
         }
-      
+
       // FIXME verify
       if ( GET_AH() == 0x04 ) goto int13_success;
 
@@ -5230,14 +5530,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
       // if needed, translate lchs to lba, and execute command
       if ( (nph != nlh) || (npspt != nlspt)) {
-        lba = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * 
(Bit32u)nlspt) + (Bit32u)sector - 1;
+        lba_low = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * 
(Bit32u)nlspt) + (Bit32u)sector - 1;
+        lba_high = 0;
         sector = 0; // this forces the command to be lba
         }
 
       if ( GET_AH() == 0x02 )
-        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, 
head, sector, lba, segment, offset);
+        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, 
head, sector, lba_low, lba_high, segment, offset);
       else
-        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 
cylinder, head, sector, lba, segment, offset);
+        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 
cylinder, head, sector, lba_low, lba_high, segment, offset);
 
       // Set nb of sector transferred
       SET_AL(read_word(ebda_seg, &EbdaData->ata.trsfsectors));
@@ -5258,7 +5559,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       break;
 
     case 0x08: /* read disk drive parameters */
-      
+
       // Get logical geometry from table
       nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
       nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
@@ -5273,13 +5574,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       SET_DL(count); /* FIXME returns 0, 1, or n hard drives */
 
       // FIXME should set ES & DI
-      
+
       goto int13_success;
       break;
 
     case 0x10: /* check drive ready */
       // should look at 40:8E also???
-      
+
       // Read the status from controller
       status = inb(read_word(ebda_seg, 
&EbdaData->ata.channels[device/2].iobase1) + ATA_CB_STAT);
       if ( (status & ( ATA_CB_STAT_BSY | ATA_CB_STAT_RDY )) == ATA_CB_STAT_RDY 
) {
@@ -5293,15 +5594,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
     case 0x15: /* read disk drive size */
 
-      // Get physical geometry from table
-      npc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.cylinders);
-      nph   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
-      npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+      // Get logical geometry from table
+      nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
+      nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
+      nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
 
       // Compute sector count seen by int13
-      lba = (Bit32u)(npc - 1) * (Bit32u)nph * (Bit32u)npspt;
-      CX = lba >> 16;
-      DX = lba & 0xffff;
+      lba_low = (Bit32u)(nlc - 1) * (Bit32u)nlh * (Bit32u)nlspt;
+      CX = lba_low >> 16;
+      DX = lba_low & 0xffff;
 
       SET_AH(3);  // hard disk accessible
       goto int13_success_noah;
@@ -5322,30 +5623,31 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
       segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
       offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
- 
-      // Can't use 64 bits lba
-      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
-      if (lba != 0L) {
-        BX_PANIC("int13_harddisk: function %02x. Can't use 64bits 
lba\n",GET_AH());
-        goto int13_fail;
-        }
-
-      // Get 32 bits lba and check
-      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
-      if (lba >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors) 
) {
+
+      // Get 32 msb lba and check
+      lba_high=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
+      if (lba_high > read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high) ) {
         BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
         goto int13_fail;
         }
 
+      // Get 32 lsb lba and check
+      lba_low=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
+      if (lba_high == read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high)
+          && lba_low >= read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_low) ) {
+        BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
+        goto int13_fail;
+        }
+
       // If verify or seek
       if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
         goto int13_success;
-      
+
       // Execute the command
       if ( GET_AH() == 0x42 )
-        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, 
lba, segment, offset);
+        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, 
lba_low, lba_high, segment, offset);
       else
-        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, 
lba, segment, offset);
+        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, 
lba_low, lba_high, segment, offset);
 
       count=read_word(ebda_seg, &EbdaData->ata.trsfsectors);
       write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
@@ -5363,7 +5665,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x49: // IBM/MS extended media change
       goto int13_success;    // Always success for HD
       break;
-      
+
     case 0x46: // IBM/MS eject media
       SET_AH(0xb2);          // Volume Not Removable
       goto int13_fail_noah;  // Always fail for HD
@@ -5373,7 +5675,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       size=read_word(DS,SI+(Bit16u)&Int13DPT->size);
 
       // Buffer is too small
-      if(size < 0x1a) 
+      if(size < 0x1a)
         goto int13_fail;
 
       // EDD 1.x
@@ -5383,17 +5685,26 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         npc     = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.cylinders);
         nph     = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.heads);
         npspt   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
-        lba     = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors);
+        lba_low = read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_low);
+        lba_high = read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high);
         blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
-        write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
-        write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+        if (lba_high || (lba_low/npspt)/nph > 0x3fff)
+        {
+          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x00); // geometry is 
invalid
+          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, 0x3fff);
+        }
+        else
+        {
+          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is 
valid
+          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+        }
         write_dword(DS, SI+(Bit16u)&Int13DPT->heads, (Bit32u)nph);
         write_dword(DS, SI+(Bit16u)&Int13DPT->spt, (Bit32u)npspt);
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba);  // FIXME 
should be Bit64
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0L);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba_low);
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, lba_high);
+        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
         }
 
       // EDD 2.x
@@ -5403,8 +5714,8 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
 
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, 
&EbdaData->ata.dpte);  
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
 
         // Fill in dpte
         channel = device / 2;
@@ -5414,14 +5725,14 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
         translation = read_byte(ebda_seg, 
&EbdaData->ata.devices[device].translation);
 
-        options  = (translation==ATA_TRANSLATION_NONE?0:1<<3); // chs 
translation
+        options  = (translation==ATA_TRANSLATION_NONE?0:1)<<3; // chs 
translation
         options |= (1<<4); // lba translation
-        options |= (mode==ATA_MODE_PIO32?1:0<<7);
-        options |= (translation==ATA_TRANSLATION_LBA?1:0<<9); 
-        options |= (translation==ATA_TRANSLATION_RECHS?3:0<<9); 
+        options |= (mode==ATA_MODE_PIO32?1:0)<<7;
+        options |= (translation==ATA_TRANSLATION_LBA?1:0)<<9;
+        options |= (translation==ATA_TRANSLATION_RECHS?3:0)<<9;
 
         write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
-        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
         write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 
2))<<4 );
         write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
         write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
@@ -5430,10 +5741,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
         write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
         write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
-        write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
- 
+        if (size >=0x42)
+          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
+        else
+          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x10);
+
         checksum=0;
-        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
(&EbdaData->ata.dpte) + i);
+        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
((Bit8u*)(&EbdaData->ata.dpte)) + i);
         checksum = ~checksum;
         write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
         }
@@ -5459,7 +5773,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
@@ -5472,7 +5786,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
           write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
           write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
@@ -5508,7 +5822,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x0d: /* alternate disk reset */
     case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
-      BX_INFO("int13h_harddisk function %02xh unimplemented, returns 
success\n", GET_AH());
+      BX_INFO("int13_harddisk: function %02xh unimplemented, returns 
success\n", GET_AH());
       goto int13_success;
       break;
 
@@ -5517,7 +5831,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x18: // set media type for format
     case 0x50: // IBM/MS send packet command
     default:
-      BX_INFO("int13_harddisk function %02xh unsupported, returns fail\n", 
GET_AH());
+      BX_INFO("int13_harddisk: function %02xh unsupported, returns fail\n", 
GET_AH());
       goto int13_fail;
       break;
     }
@@ -5553,8 +5867,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
   Bit16u count, segment, offset, i, size;
 
   BX_DEBUG_INT13_CD("int13_cdrom: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", 
AX, BX, CX, DX, ES);
-  // BX_DEBUG_INT13_CD("int13_cdrom: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), DS, ES, DI, SI);
-  
+
   SET_DISK_RET_STATUS(0x00);
 
   /* basic check : device should be 0xE0+ */
@@ -5571,16 +5884,16 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
     BX_INFO("int13_cdrom: function %02x, unmapped device for ELDL=%02x\n", 
GET_AH(), GET_ELDL());
     goto int13_fail;
     }
-  
+
   switch (GET_AH()) {
 
     // all those functions return SUCCESS
     case 0x00: /* disk controller reset */
     case 0x09: /* initialize drive parameters */
     case 0x0c: /* seek to specified cylinder */
-    case 0x0d: /* alternate disk reset */  
-    case 0x10: /* check drive ready */    
-    case 0x11: /* recalibrate */      
+    case 0x0d: /* alternate disk reset */
+    case 0x10: /* check drive ready */
+    case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
     case 0x16: /* detect disk change */
       goto int13_success;
@@ -5602,7 +5915,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
       /* set CF if error status read */
       if (status) goto int13_fail_nostatus;
       else        goto int13_success_noah;
-      break;      
+      break;
 
     case 0x15: /* read disk drive size */
       SET_AH(0x02);
@@ -5619,11 +5932,11 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
     case 0x42: // IBM/MS extended read
     case 0x44: // IBM/MS verify sectors
     case 0x47: // IBM/MS extended seek
-       
+
       count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
       segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
       offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
- 
+
       // Can't use 64 bits lba
       lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
       if (lba != 0L) {
@@ -5631,13 +5944,13 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
         goto int13_fail;
         }
 
-      // Get 32 bits lba 
+      // Get 32 bits lba
       lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
 
       // If verify or seek
       if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
         goto int13_success;
-      
+
       memsetb(get_SS(),atacmd,0,12);
       atacmd[0]=0x28;                      // READ command
       atacmd[7]=(count & 0xff00) >> 8;     // Sectors
@@ -5646,7 +5959,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
       atacmd[3]=(lba & 0x00ff0000) >> 16;
       atacmd[4]=(lba & 0x0000ff00) >> 8;
       atacmd[5]=(lba & 0x000000ff);
-      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, 
ATA_DATA_IN, segment,offset); 
+      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, 
ATA_DATA_IN, segment,offset);
 
       count = (Bit16u)(read_dword(ebda_seg, &EbdaData->ata.trsfbytes) >> 11);
       write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
@@ -5693,21 +6006,21 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
 
     case 0x46: // IBM/MS eject media
       locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
-      
+
       if (locks != 0) {
         SET_AH(0xb1); // media locked
         goto int13_fail_noah;
         }
       // FIXME should handle 0x31 no media in device
       // FIXME should handle 0xb5 valid request failed
-    
+
       // Call removable media eject
       ASM_START
         push bp
         mov  bp, sp
 
         mov ah, #0x52
-        int 15
+        int #0x15
         mov _int13_cdrom.status + 2[bp], ah
         jnc int13_cdrom_rme_end
         mov _int13_cdrom.status, #1
@@ -5727,7 +6040,7 @@ int13_cdrom_rme_end:
       size = read_word(DS,SI+(Bit16u)&Int13Ext->size);
 
       // Buffer is too small
-      if(size < 0x1a) 
+      if(size < 0x1a)
         goto int13_fail;
 
       // EDD 1.x
@@ -5742,8 +6055,8 @@ int13_cdrom_rme_end:
         write_dword(DS, SI+(Bit16u)&Int13DPT->heads, 0xffffffff);
         write_dword(DS, SI+(Bit16u)&Int13DPT->spt, 0xffffffff);
         write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, 0xffffffff);  // 
FIXME should be Bit64
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
+        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
         }
 
       // EDD 2.x
@@ -5753,8 +6066,8 @@ int13_cdrom_rme_end:
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
 
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, 
&EbdaData->ata.dpte);  
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
 
         // Fill in dpte
         channel = device / 2;
@@ -5770,7 +6083,7 @@ int13_cdrom_rme_end:
         options |= (mode==ATA_MODE_PIO32?1:0<<7);
 
         write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
-        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
         write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 
2))<<4 );
         write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
         write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
@@ -5782,7 +6095,7 @@ int13_cdrom_rme_end:
         write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
 
         checksum=0;
-        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
(&EbdaData->ata.dpte) + i);
+        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
((Bit8u*)(&EbdaData->ata.dpte)) + i);
         checksum = ~checksum;
         write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
         }
@@ -5808,7 +6121,7 @@ int13_cdrom_rme_end:
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
@@ -5821,7 +6134,7 @@ int13_cdrom_rme_end:
           write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
           write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
@@ -5843,7 +6156,7 @@ int13_cdrom_rme_end:
       SET_AH(06);
       goto int13_fail_nostatus;
       break;
-      
+
     case 0x4e: // // IBM/MS set hardware configuration
       // DMA, prefetch, PIO maximum not supported
       switch (GET_AL()) {
@@ -5905,7 +6218,7 @@ int13_eltorito(DS, ES, DI, SI, BP, SP, B
 
   BX_DEBUG_INT13_ET("int13_eltorito: AX=%04x BX=%04x CX=%04x DX=%04x 
ES=%04x\n", AX, BX, CX, DX, ES);
   // BX_DEBUG_INT13_ET("int13_eltorito: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), DS, ES, DI, SI);
-  
+
   switch (GET_AH()) {
 
     // FIXME ElTorito Various. Should be implemented
@@ -5980,11 +6293,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
   Bit8u  atacmd[12];
 
   BX_DEBUG_INT13_ET("int13_cdemu: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", 
AX, BX, CX, DX, ES);
-  //BX_DEBUG_INT13_ET("int13_cdemu: SS=%04x ES=%04x DI=%04x SI=%04x\n", 
get_SS(), ES, DI, SI);
-  
+
   /* at this point, we are emulating a floppy/harddisk */
-  
-  // Recompute the device number 
+
+  // Recompute the device number
   device  = read_byte(ebda_seg,&EbdaData->cdemu.controller_index) * 2;
   device += read_byte(ebda_seg,&EbdaData->cdemu.device_spec);
 
@@ -5997,7 +6309,6 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
     goto int13_fail;
     }
 
-  
   switch (GET_AH()) {
 
     // all those functions return SUCCESS
@@ -6006,7 +6317,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
     case 0x0c: /* seek to specified cylinder */
     case 0x0d: /* alternate disk reset */  // FIXME ElTorito Various. should 
really reset ?
     case 0x10: /* check drive ready */     // FIXME ElTorito Various. should 
check if ready ?
-    case 0x11: /* recalibrate */      
+    case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
     case 0x16: /* detect disk change */
       goto int13_success;
@@ -6031,9 +6342,9 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
 
     case 0x02: // read disk sectors
     case 0x04: // verify disk sectors
-      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
-      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders); 
-      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads); 
+      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
+      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
 
       ilba       = read_dword(ebda_seg,&EbdaData->cdemu.ilba);
 
@@ -6062,17 +6373,17 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
 
       // calculate the virtual lba inside the image
       
vlba=((((Bit32u)cylinder*(Bit32u)vheads)+(Bit32u)head)*(Bit32u)vspt)+((Bit32u)(sector-1));
- 
+
       // In advance so we don't loose the count
       SET_AL(nbsectors);
 
       // start lba on cd
-      slba  = (Bit32u)vlba/4; 
+      slba  = (Bit32u)vlba/4;
       before= (Bit16u)vlba%4;
 
       // end lba on cd
       elba = (Bit32u)(vlba+nbsectors-1)/4;
-      
+
       memsetb(get_SS(),atacmd,0,12);
       atacmd[0]=0x28;                      // READ command
       atacmd[7]=((Bit16u)(elba-slba+1) & 0xff00) >> 8; // Sectors
@@ -6092,10 +6403,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
       break;
 
     case 0x08: /* read disk drive parameters */
-      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
-      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1; 
-      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1; 
- 
+      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
+      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
+
       SET_AL( 0x00 );
       SET_BL( 0x00 );
       SET_CH( vcylinders & 0xff );
@@ -6103,7 +6414,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
       SET_DH( vheads );
       SET_DL( 0x02 );   // FIXME ElTorito Various. should send the real count 
of drives 1 or 2
                         // FIXME ElTorito Harddisk. should send the HD count
- 
+
       switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
         case 0x01: SET_BL( 0x02 ); break;
         case 0x02: SET_BL( 0x04 ); break;
@@ -6139,7 +6450,7 @@ ASM_END
     case 0x45: // IBM/MS lock/unlock drive
     case 0x46: // IBM/MS eject media
     case 0x47: // IBM/MS extended seek
-    case 0x48: // IBM/MS get drive parameters 
+    case 0x48: // IBM/MS get drive parameters
     case 0x49: // IBM/MS extended media change
     case 0x4e: // ? - set hardware configuration
     case 0x50: // ? - send packet command
@@ -6227,8 +6538,8 @@ ASM_END
 }
 
   void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
-  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
 {
   Bit8u    drive, num_sectors, sector, head, status, mod;
   Bit8u    drive_map;
@@ -6334,7 +6645,7 @@ BX_DEBUG_INT13_HD("int13_f01\n");
         }
 
       if ( (num_sectors > 128) || (num_sectors == 0) )
-        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
 
       if (head > 15)
         BX_PANIC("hard drive BIOS:(read/verify) head > 15\n");
@@ -6480,7 +6791,7 @@ BX_DEBUG_INT13_HD("int13_f03\n");
         }
 
       if ( (num_sectors > 128) || (num_sectors == 0) )
-        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
 
       if (head > 15)
         BX_PANIC("hard drive BIOS:(read) head > 15\n");
@@ -6590,7 +6901,7 @@ BX_DEBUG_INT13_HD("int13_f05\n");
 
     case 0x08: /* read disk drive parameters */
 BX_DEBUG_INT13_HD("int13_f08\n");
-      
+
       drive = GET_ELDL ();
       get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
 
@@ -6730,10 +7041,10 @@ ASM_END
       break;
 
     case 0x18: // set media type for format
-    case 0x41: // IBM/MS 
-    case 0x42: // IBM/MS 
-    case 0x43: // IBM/MS 
-    case 0x44: // IBM/MS 
+    case 0x41: // IBM/MS
+    case 0x42: // IBM/MS
+    case 0x43: // IBM/MS
+    case 0x44: // IBM/MS
     case 0x45: // IBM/MS lock/unlock drive
     case 0x46: // IBM/MS eject media
     case 0x47: // IBM/MS extended seek
@@ -6778,7 +7089,7 @@ get_hd_geometry(drive, hd_cylinders, hd_
     hd_type = inb_cmos(0x12) & 0x0f;
     if (hd_type != 0x0f)
       BX_INFO(panic_msg_reg12h,1);
-    hd_type = inb_cmos(0x1a); // HD0: extended type
+    hd_type = inb_cmos(0x1a); // HD1: extended type
     if (hd_type != 47)
       BX_INFO(panic_msg_reg19h,0,0x1a);
     iobase = 0x24;
@@ -6797,10 +7108,71 @@ get_hd_geometry(drive, hd_cylinders, hd_
 
 #endif //else BX_USE_ATADRV
 
+#if BX_SUPPORT_FLOPPY
 
 //////////////////////
 // FLOPPY functions //
 //////////////////////
+
+void floppy_reset_controller()
+{
+  Bit8u val8;
+
+  // Reset controller
+  val8 = inb(0x03f2);
+  outb(0x03f2, val8 & ~0x04);
+  outb(0x03f2, val8 | 0x04);
+
+  // Wait for controller to come out of reset
+  do {
+    val8 = inb(0x3f4);
+  } while ( (val8 & 0xc0) != 0x80 );
+}
+
+void floppy_prepare_controller(drive)
+  Bit16u drive;
+{
+  Bit8u  val8, dor, prev_reset;
+
+  // set 40:3e bit 7 to 0
+  val8 = read_byte(0x0040, 0x003e);
+  val8 &= 0x7f;
+  write_byte(0x0040, 0x003e, val8);
+
+  // turn on motor of selected drive, DMA & int enabled, normal operation
+  prev_reset = inb(0x03f2) & 0x04;
+  if (drive)
+    dor = 0x20;
+  else
+    dor = 0x10;
+  dor |= 0x0c;
+  dor |= drive;
+  outb(0x03f2, dor);
+
+  // reset the disk motor timeout value of INT 08
+  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+  // wait for drive readiness
+  do {
+    val8 = inb(0x3f4);
+  } while ( (val8 & 0xc0) != 0x80 );
+
+  if (prev_reset == 0) {
+    // turn on interrupts
+ASM_START
+    sti
+ASM_END
+    // wait on 40:3e bit 7 to become 1
+    do {
+      val8 = read_byte(0x0040, 0x003e);
+    } while ( (val8 & 0x80) == 0 );
+    val8 &= 0x7f;
+ASM_START
+    cli
+ASM_END
+    write_byte(0x0040, 0x003e, val8);
+  }
+}
 
   bx_bool
 floppy_media_known(drive)
@@ -6908,7 +7280,7 @@ floppy_media_sense(drive)
     retval = 1;
     }
   //
-  // Extended floppy size uses special cmos setting 
+  // Extended floppy size uses special cmos setting
   else if ( drive_type == 6 ) {
     // 160k 5.25" drive
     config_data = 0x00; // 0000 0000
@@ -6949,63 +7321,41 @@ floppy_drive_recal(drive)
 floppy_drive_recal(drive)
   Bit16u drive;
 {
-  Bit8u  val8, dor;
+  Bit8u  val8;
   Bit16u curr_cyl_offset;
 
-  // set 40:3e bit 7 to 0
-  val8 = read_byte(0x0000, 0x043e);
-  val8 &= 0x7f;
-  write_byte(0x0000, 0x043e, val8);
-
-  // turn on motor of selected drive, DMA & int enabled, normal operation
-  if (drive)
-    dor = 0x20;
-  else
-    dor = 0x10;
-  dor |= 0x0c;
-  dor |= drive;
-  outb(0x03f2, dor);
-
-  // reset the disk motor timeout value of INT 08
-  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-  // check port 3f4 for drive readiness
-  val8 = inb(0x3f4);
-  if ( (val8 & 0xf0) != 0x80 )
-    BX_PANIC("floppy recal:f07: ctrl not ready\n");
+  floppy_prepare_controller(drive);
 
   // send Recalibrate command (2 bytes) to controller
   outb(0x03f5, 0x07);  // 07: Recalibrate
   outb(0x03f5, drive); // 0=drive0, 1=drive1
 
- // turn on interrupts
+  // turn on interrupts
 ASM_START
   sti
 ASM_END
 
   // wait on 40:3e bit 7 to become 1
-  val8 = (read_byte(0x0000, 0x043e) & 0x80);
-  while ( val8 == 0 ) {
-    val8 = (read_byte(0x0000, 0x043e) & 0x80);
-    }
-
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+  do {
+    val8 = (read_byte(0x0040, 0x003e) & 0x80);
+  } while ( val8 == 0 );
+
+  val8 = 0; // separate asm from while() loop
+  // turn off interrupts
 ASM_START
   cli
 ASM_END
 
   // set 40:3e bit 7 to 0, and calibrated bit
-  val8 = read_byte(0x0000, 0x043e);
+  val8 = read_byte(0x0040, 0x003e);
   val8 &= 0x7f;
   if (drive) {
     val8 |= 0x02; // Drive 1 calibrated
     curr_cyl_offset = 0x0095;
-    }
-  else {
+  } else {
     val8 |= 0x01; // Drive 0 calibrated
     curr_cyl_offset = 0x0094;
-    }
+  }
   write_byte(0x0040, 0x003e, val8);
   write_byte(0x0040, curr_cyl_offset, 0); // current cylinder is 0
 
@@ -7032,7 +7382,6 @@ floppy_drive_exists(drive)
     return(1);
 }
 
-#if BX_SUPPORT_FLOPPY
   void
 int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, 
FLAGS)
   Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
@@ -7045,7 +7394,6 @@ int13_diskette_function(DS, ES, DI, SI, 
   Bit16u es, last_addr;
 
   BX_DEBUG_INT13_FL("int13_diskette: AX=%04x BX=%04x CX=%04x DX=%04x 
ES=%04x\n", AX, BX, CX, DX, ES);
-  // BX_DEBUG_INT13_FL("int13_diskette: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), get_DS(), ES, DI, SI);
 
   ah = GET_AH();
 
@@ -7058,7 +7406,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
         set_diskette_ret_status(1);
         SET_CF();
         return;
-        }
+      }
       drive_type = inb_cmos(0x10);
 
       if (drive == 0)
@@ -7070,7 +7418,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
         set_diskette_ret_status(0x80);
         SET_CF();
         return;
-        }
+      }
       SET_AH(0);
       set_diskette_ret_status(0);
       CLEAR_CF(); // successful
@@ -7083,7 +7431,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
       SET_AH(val8);
       if (val8) {
         SET_CF();
-        }
+      }
       return;
 
     case 0x02: // Read Diskette Sectors
@@ -7095,15 +7443,15 @@ BX_DEBUG_INT13_FL("floppy f00\n");
       head        = GET_DH();
       drive       = GET_ELDL();
 
-      if ( (drive > 1) || (head > 1) ||
-           (num_sectors == 0) || (num_sectors > 72) ) {
-BX_INFO("floppy: drive>1 || head>1 ...\n");
+      if ((drive > 1) || (head > 1) || (sector == 0) ||
+          (num_sectors == 0) || (num_sectors > 72)) {
+        BX_INFO("int13_diskette: read/write/verify: parameter out of range\n");
         SET_AH(1);
         set_diskette_ret_status(1);
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if drive exists
       if (floppy_drive_exists(drive) == 0) {
@@ -7112,7 +7460,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if media in drive, and type is known
       if (floppy_media_known(drive) == 0) {
@@ -7122,8 +7470,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
         }
+      }
 
       if (ah == 0x02) {
         // Read Diskette Sectors
@@ -7142,7 +7490,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         if ( base_address < base_es ) {
           // in case of carry, adjust page by 1
           page++;
-          }
+        }
         base_count = (num_sectors * 512) - 1;
 
         // check for 64K boundary overrun
@@ -7153,7 +7501,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
         outb(0x000a, 0x06);
@@ -7186,28 +7534,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         //--------------------------------------
         // set up floppy controller for transfer
         //--------------------------------------
-
-        // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
-        val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
-
-        // turn on motor of selected drive, DMA & int enabled, normal operation
-        if (drive)
-          dor = 0x20;
-        else
-          dor = 0x10;
-        dor |= 0x0c;
-        dor |= drive;
-        outb(0x03f2, dor);
-
-        // reset the disk motor timeout value of INT 08
-        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-        // check port 3f4 for drive readiness
-        val8 = inb(0x3f4);
-        if ( (val8 & 0xf0) != 0x80 )
-          BX_PANIC("int13_diskette:f02: ctrl not ready\n");
+        floppy_prepare_controller(drive);
 
         // send read-normal-data command (9 bytes) to controller
         outb(0x03f5, 0xe6); // e6: read normal data
@@ -7220,27 +7547,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         outb(0x03f5, 0); // Gap length
         outb(0x03f5, 0xff); // Gap length
 
-       // turn on interrupts
+        // turn on interrupts
   ASM_START
         sti
   ASM_END
 
         // wait on 40:3e bit 7 to become 1
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
-        while ( val8 == 0 ) {
-          val8 = (read_byte(0x0000, 0x043e) & 0x80);
+        do {
+          val8 = read_byte(0x0040, 0x0040);
+          if (val8 == 0) {
+            floppy_reset_controller();
+            SET_AH(0x80); // drive not ready (timeout)
+            set_diskette_ret_status(0x80);
+            SET_AL(0); // no sectors read
+            SET_CF(); // error occurred
+            return;
           }
-
-       val8 = 0; // separate asm from while() loop
-       // turn off interrupts
+          val8 = (read_byte(0x0040, 0x003e) & 0x80);
+        } while ( val8 == 0 );
+
+        val8 = 0; // separate asm from while() loop
+        // turn off interrupts
   ASM_START
         cli
   ASM_END
 
         // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
+        val8 = read_byte(0x0040, 0x003e);
         val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
+        write_byte(0x0040, 0x003e, val8);
 
         // check port 3f4 for accessibility to status bytes
         val8 = inb(0x3f4);
@@ -7271,7 +7606,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         // ??? should track be new val from return_status[3] ?
         set_diskette_current_cyl(drive, track);
@@ -7279,8 +7614,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AH(0x00); // success
         CLEAR_CF();   // success
         return;
-        }
-      else if (ah == 0x03) {
+      } else if (ah == 0x03) {
         // Write Diskette Sectors
 
         //-----------------------------------
@@ -7297,7 +7631,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         if ( base_address < base_es ) {
           // in case of carry, adjust page by 1
           page++;
-          }
+        }
         base_count = (num_sectors * 512) - 1;
 
         // check for 64K boundary overrun
@@ -7308,7 +7642,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
         outb(0x000a, 0x06);
@@ -7334,30 +7668,9 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         //--------------------------------------
         // set up floppy controller for transfer
         //--------------------------------------
-
-        // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
-        val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
-
-        // turn on motor of selected drive, DMA & int enabled, normal operation
-        if (drive)
-          dor = 0x20;
-        else
-          dor = 0x10;
-        dor |= 0x0c;
-        dor |= drive;
-        outb(0x03f2, dor);
-
-        // reset the disk motor timeout value of INT 08
-        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-        // check port 3f4 for drive readiness
-        val8 = inb(0x3f4);
-        if ( (val8 & 0xf0) != 0x80 )
-          BX_PANIC("int13_diskette:f03: ctrl not ready\n");
-
-        // send read-normal-data command (9 bytes) to controller
+        floppy_prepare_controller(drive);
+
+        // send write-normal-data command (9 bytes) to controller
         outb(0x03f5, 0xc5); // c5: write normal data
         outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
         outb(0x03f5, track);
@@ -7368,27 +7681,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         outb(0x03f5, 0); // Gap length
         outb(0x03f5, 0xff); // Gap length
 
-       // turn on interrupts
+        // turn on interrupts
   ASM_START
         sti
   ASM_END
 
         // wait on 40:3e bit 7 to become 1
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
-        while ( val8 == 0 ) {
-          val8 = (read_byte(0x0000, 0x043e) & 0x80);
+        do {
+          val8 = read_byte(0x0040, 0x0040);
+          if (val8 == 0) {
+            floppy_reset_controller();
+            SET_AH(0x80); // drive not ready (timeout)
+            set_diskette_ret_status(0x80);
+            SET_AL(0); // no sectors written
+            SET_CF(); // error occurred
+            return;
           }
-
-       val8 = 0; // separate asm from while() loop
-       // turn off interrupts
+          val8 = (read_byte(0x0040, 0x003e) & 0x80);
+        } while ( val8 == 0 );
+
+        val8 = 0; // separate asm from while() loop
+        // turn off interrupts
   ASM_START
         cli
   ASM_END
 
         // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
+        val8 = read_byte(0x0040, 0x003e);
         val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
+        write_byte(0x0040, 0x003e, val8);
 
         // check port 3f4 for accessibility to status bytes
         val8 = inb(0x3f4);
@@ -7432,8 +7753,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AH(0x00); // success
         CLEAR_CF();   // success
         return;
-        }
-      else {  // if (ah == 0x04)
+      } else {  // if (ah == 0x04)
         // Verify Diskette Sectors
 
         // ??? should track be new val from return_status[3] ?
@@ -7442,8 +7762,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         CLEAR_CF();   // success
         SET_AH(0x00); // success
         return;
-        }
-
+      }
+      break;
 
     case 0x05: // format diskette track
 BX_DEBUG_INT13_FL("floppy f05\n");
@@ -7458,7 +7778,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         SET_AH(1);
         set_diskette_ret_status(1);
         SET_CF(); // error occurred
-        }
+      }
 
       // see if drive exists
       if (floppy_drive_exists(drive) == 0) {
@@ -7466,7 +7786,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         set_diskette_ret_status(0x80);
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if media in drive, and type is known
       if (floppy_media_known(drive) == 0) {
@@ -7476,8 +7796,8 @@ BX_DEBUG_INT13_FL("floppy f05\n");
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
         }
+      }
 
       // set up DMA controller for transfer
       page = (ES >> 12);   // upper 4 bits
@@ -7487,7 +7807,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
       if ( base_address < base_es ) {
         // in case of carry, adjust page by 1
         page++;
-        }
+      }
       base_count = (num_sectors * 4) - 1;
 
       // check for 64K boundary overrun
@@ -7498,7 +7818,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       outb(0x000a, 0x06);
       outb(0x000c, 0x00); // clear flip-flop
@@ -7515,27 +7835,9 @@ BX_DEBUG_INT13_FL("floppy f05\n");
       outb(0x000a, 0x02);
 
       // set up floppy controller for transfer
-      val8 = read_byte(0x0000, 0x043e);
-      val8 &= 0x7f;
-      write_byte(0x0000, 0x043e, val8);
-      // turn on motor of selected drive, DMA & int enabled, normal operation
-      if (drive)
-        dor = 0x20;
-      else
-        dor = 0x10;
-      dor |= 0x0c;
-      dor |= drive;
-      outb(0x03f2, dor);
-
-      // reset the disk motor timeout value of INT 08
-      write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-      // check port 3f4 for drive readiness
-      val8 = inb(0x3f4);
-      if ( (val8 & 0xf0) != 0x80 )
-        BX_PANIC("int13_diskette:f05: ctrl not ready\n");
-
-      // send read-normal-data command (6 bytes) to controller
+      floppy_prepare_controller(drive);
+
+      // send format-track command (6 bytes) to controller
       outb(0x03f5, 0x4d); // 4d: format track
       outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
       outb(0x03f5, 2); // 512 byte sector size
@@ -7546,20 +7848,29 @@ BX_DEBUG_INT13_FL("floppy f05\n");
   ASM_START
       sti
   ASM_END
+
       // wait on 40:3e bit 7 to become 1
-      val8 = (read_byte(0x0000, 0x043e) & 0x80);
-      while ( val8 == 0 ) {
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
+      do {
+        val8 = read_byte(0x0040, 0x0040);
+        if (val8 == 0) {
+          floppy_reset_controller();
+          SET_AH(0x80); // drive not ready (timeout)
+          set_diskette_ret_status(0x80);
+          SET_CF(); // error occurred
+          return;
         }
-     val8 = 0; // separate asm from while() loop
-     // turn off interrupts
+        val8 = (read_byte(0x0040, 0x003e) & 0x80);
+      } while ( val8 == 0 );
+
+      val8 = 0; // separate asm from while() loop
+      // turn off interrupts
   ASM_START
       cli
   ASM_END
       // set 40:3e bit 7 to 0
-      val8 = read_byte(0x0000, 0x043e);
+      val8 = read_byte(0x0040, 0x003e);
       val8 &= 0x7f;
-      write_byte(0x0000, 0x043e, val8);
+      write_byte(0x0040, 0x003e, val8);
       // check port 3f4 for accessibility to status bytes
       val8 = inb(0x3f4);
       if ( (val8 & 0xc0) != 0xc0 )
@@ -7911,8 +8222,9 @@ Bit16u seq_nr;
   Bit16u bootseg;
   Bit16u bootip;
   Bit16u status;
-
-  struct ipl_entry e;
+  Bit16u bootfirst;
+
+  ipl_entry_t e;
 
   // if BX_ELTORITO_BOOT is not defined, old behavior
   //   check bit 5 in CMOS reg 0x2d.  load either 0x00 or 0x80 into DL
@@ -7926,7 +8238,7 @@ Bit16u seq_nr;
   //     CMOS reg 0x38 & 0xf0 : 3rd boot device
   //   boot device codes:
   //     0x00 : not defined
-  //     0x01 : first floppy 
+  //     0x01 : first floppy
   //     0x02 : first harddrive
   //     0x03 : first cdrom
   //     0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot)
@@ -7938,16 +8250,25 @@ Bit16u seq_nr;
   bootdev |= ((inb_cmos(0x38) & 0xf0) << 4);
   bootdev >>= 4 * seq_nr;
   bootdev &= 0xf;
-  if (bootdev == 0) BX_PANIC("No bootable device.\n");
-  
+
+  /* Read user selected device */
+  bootfirst = read_word(ebda_seg, IPL_BOOTFIRST_OFFSET);
+  if (bootfirst != 0xFFFF) {
+    bootdev = bootfirst;
+    /* User selected device not set */
+    write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
+    /* Reset boot sequence */
+    write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xFFFF);
+  } else if (bootdev == 0) BX_PANIC("No bootable device.\n");
+
   /* Translate from CMOS runes to an IPL table offset by subtracting 1 */
   bootdev -= 1;
-#else  
+#else
   if (seq_nr ==2) BX_PANIC("No more boot devices.");
-  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1)) 
+  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
       /* Boot from floppy if the bit is set or it's the second boot */
     bootdev = 0x00;
-  else 
+  else
     bootdev = 0x01;
 #endif
 
@@ -7959,13 +8280,13 @@ Bit16u seq_nr;
 
   /* Do the loading, and set up vector as a far pointer to the boot
    * address, and bootdrv as the boot drive */
-  print_boot_device(e.type);
+  print_boot_device(&e);
 
   switch(e.type) {
-  case 0x01: /* FDD */
-  case 0x02: /* HDD */
-
-    bootdrv = (e.type == 0x02) ? 0x80 : 0x00;
+  case IPL_TYPE_FLOPPY: /* FDD */
+  case IPL_TYPE_HARDDISK: /* HDD */
+
+    bootdrv = (e.type == IPL_TYPE_HARDDISK) ? 0x80 : 0x00;
     bootseg = 0x07c0;
     status = 0;
 
@@ -7980,7 +8301,7 @@ ASM_START
     mov  dl, _int18_function.bootdrv + 2[bp]
     mov  ax, _int18_function.bootseg + 2[bp]
     mov  es, ax         ;; segment
-    mov  bx, #0x0000    ;; offset
+    xor  bx, bx         ;; offset
     mov  ah, #0x02      ;; function 2, read diskette sector
     mov  al, #0x01      ;; read 1 sector
     mov  ch, #0x00      ;; track 0
@@ -7998,7 +8319,7 @@ int19_load_done:
     pop  ax
     pop  bp
 ASM_END
-    
+
     if (status != 0) {
       print_boot_failure(e.type, 1);
       return;
@@ -8006,7 +8327,7 @@ ASM_END
 
     /* Always check the signature on a HDD boot sector; on FDD, only do
      * the check if the CMOS doesn't tell us to skip it */
-    if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) {
+    if ((e.type != IPL_TYPE_FLOPPY) || !((inb_cmos(0x38) & 0x01))) {
       if (read_word(bootseg,0x1fe) != 0xaa55) {
         print_boot_failure(e.type, 0);
         return;
@@ -8024,7 +8345,7 @@ ASM_END
   break;
 
 #if BX_ELTORITO_BOOT
-  case 0x03: /* CD-ROM */
+  case IPL_TYPE_CDROM: /* CD-ROM */
     status = cdrom_boot();
 
     // If failure
@@ -8043,7 +8364,7 @@ ASM_END
     break;
 #endif
 
-  case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
+  case IPL_TYPE_BEV: /* Expansion ROM with a Bootstrap Entry Vector (a far 
pointer) */
     bootseg = e.vector >> 16;
     bootip = e.vector & 0xffff;
     break;
@@ -8051,16 +8372,20 @@ ASM_END
   default: return;
   }
 
-  
+  /* Debugging info */
+  BX_INFO("Booting from %x:%x\n", bootseg, bootip);
+
   /* Jump to the boot vector */
 ASM_START
     mov  bp, sp
+//    push cs
+//    push #int18_handler
     ;; Build an iret stack frame that will take us to the boot vector.
     ;; iret pops ip, then cs, then flags, so push them in the opposite order.
     pushf
-    mov  ax, _int18_function.bootseg + 0[bp] 
+    mov  ax, _int18_function.bootseg + 0[bp]
     push ax
-    mov  ax, _int18_function.bootip + 0[bp] 
+    mov  ax, _int18_function.bootip + 0[bp]
     push ax
     ;; Set the magic number in ax and the boot drive in dl.
     mov  ax, #0xaa55
@@ -8263,7 +8588,11 @@ int1a_function(regs, ds, iret_addr)
       } else if (regs.u.r8.bl == 0x83) {
         BX_INFO("bad PCI vendor ID %04x\n", regs.u.r16.dx);
       } else if (regs.u.r8.bl == 0x86) {
-        BX_INFO("PCI device %04x:%04x not found\n", regs.u.r16.dx, 
regs.u.r16.cx);
+        if (regs.u.r8.al == 0x02) {
+          BX_INFO("PCI device %04x:%04x not found at index %d\n", 
regs.u.r16.dx, regs.u.r16.cx, regs.u.r16.si);
+        } else {
+          BX_INFO("no PCI device with class code 0x%02x%04x found at index 
%d\n", regs.u.r8.cl, regs.u.r16.dx, regs.u.r16.si);
+        }
       }
       regs.u.r8.ah = regs.u.r8.bl;
       SetCF(iret_addr.flags);
@@ -8309,11 +8638,11 @@ ASM_END
           // Done waiting.
           Bit16u segment, offset;
 
-          offset = read_word( 0x40, 0x98 );
-          segment = read_word( 0x40, 0x9A );
+          segment = read_word( 0x40, 0x98 );
+          offset = read_word( 0x40, 0x9A );
           write_byte( 0x40, 0xA0, 0 );  // Turn of status byte.
           outb_cmos( 0xB, registerB & 0x37 ); // Clear the Periodic Interrupt.
-          write_byte( segment, offset, 0x80 );  // Write to specified flag 
byte.
+          write_byte(segment, offset, read_byte(segment, offset) | 0x80 );  // 
Write to specified flag byte.
         } else {
           // Continue waiting.
           time -= 0x3D1;
@@ -8521,13 +8850,18 @@ int13_notcdrom:
 #endif
 
 int13_disk:
+  ;; int13_harddisk modifies high word of EAX
+  shr   eax, #16
+  push  ax
   call  _int13_harddisk
+  pop   ax
+  shl   eax, #16
 
 int13_out:
   pop ds
   pop es
   popa
-  iret 
+  iret
 
 ;----------
 ;- INT18h -
@@ -8540,18 +8874,19 @@ int18_handler: ;; Boot Failure recovery:
   xor  ax, ax
   mov  ss, ax
 
-  ;; Get the boot sequence number out of the IPL memory
   ;; The first time we do this it will have been set to -1 so 
   ;; we will start from device 0.
-  mov  bx, #IPL_SEG 
+  mov  ds, ax
+  mov  bx, word ptr [0x40E]       ;; EBDA segment
   mov  ds, bx                     ;; Set segment
   mov  bx, IPL_SEQUENCE_OFFSET    ;; BX is now the sequence number
   inc  bx                         ;; ++
   mov  IPL_SEQUENCE_OFFSET, bx    ;; Write it back
-  mov  ds, ax                     ;; and reset the segment to zero. 
+  mov  ds, ax                     ;; and reset the segment to zero.
 
   ;; Call the C code for the next boot device
   push bx
+
   call _int18_function
 
   ;; Boot failed: invoke the boot recovery function...
@@ -8561,6 +8896,7 @@ int18_handler: ;; Boot Failure recovery:
 ;- INT19h -
 ;----------
 int19_relocated: ;; Boot function, relocated
+
   ;;
   ;; *** Warning: INT 19h resets the whole machine *** 
   ;;
@@ -8572,10 +8908,12 @@ int19_relocated: ;; Boot function, reloc
   ;; boot sequence will start, which is more or less the required behaviour.
   ;; 
   ;; Reset SP and SS
+
   mov  ax, #0xfffe
   mov  sp, ax
   xor  ax, ax
   mov  ss, ax
+
   call _machine_reset
 
 ;----------
@@ -8589,7 +8927,7 @@ int1c_handler: ;; User Timer Tick
 ;- POST: Floppy Drive -
 ;----------------------
 floppy_drive_post:
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
 
   mov  al, #0x00
@@ -8671,7 +9009,7 @@ hard_drive_post:
   mov  dx, #0x03f6
   out  dx, al
 
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   mov  0x0474, al /* hard disk status of last operation */
   mov  0x0477, al /* hard disk port offset (XT only ???) */
@@ -8686,8 +9024,8 @@ hard_drive_post:
   SET_INT_VECTOR(0x76, #0xF000, #int76_handler)
   ;; INT 41h: hard disk 0 configuration pointer
   ;; INT 46h: hard disk 1 configuration pointer
-  SET_INT_VECTOR(0x41, #EBDA_SEG, #0x003D)
-  SET_INT_VECTOR(0x46, #EBDA_SEG, #0x004D)
+  SET_INT_VECTOR(0x41, word ptr [0x40E], #0x003D) /* EBDA:003D */
+  SET_INT_VECTOR(0x46, word ptr [0x40E], #0x004D) /* EBDA:004D */
 
   ;; move disk geometry data from CMOS to EBDA disk parameter table(s)
   mov  al, #0x12
@@ -8716,7 +9054,9 @@ post_d0_type47:
   ;; 22    landing zone high        D
   ;; 23    sectors/track            E
 
-  mov  ax, #EBDA_SEG
+  xor  ax, ax
+  mov  ds, ax
+  mov  ax, word ptr [0x40E] ;; EBDA segment
   mov  ds, ax
 
   ;;; Filling EBDA table for hard disk 0.
@@ -8862,7 +9202,9 @@ post_d1_type47:
   ;; 0x2b    landing zone high        D
   ;; 0x2c    sectors/track            E
 ;;; Fill EBDA table for hard disk 1.
-  mov  ax, #EBDA_SEG
+  xor  ax, ax
+  mov  ds, ax
+  mov  ax, word ptr [0x40E] ;; EBDA segment
   mov  ds, ax
   mov  al, #0x28
   out  #0x70, al
@@ -8993,13 +9335,42 @@ ebda_post:
 ;--------------------
 ; relocated here because the primary POST area isnt big enough.
 eoi_jmp_post:
-  call eoi_both_pics
-
+  mov   al, #0x20
+  out   #0xA0, al ;; slave  PIC EOI
+  mov   al, #0x20
+  out   #0x20, al ;; master PIC EOI
+
+jmp_post_0x467:
   xor ax, ax
   mov ds, ax
 
   jmp far ptr [0x467]
 
+iret_post_0x467:
+  xor ax, ax
+  mov ds, ax
+
+  mov sp, [0x467]
+  mov ss, [0x469]
+  iret
+
+retf_post_0x467:
+  xor ax, ax
+  mov ds, ax
+
+  mov sp, [0x467]
+  mov ss, [0x469]
+  retf
+
+s3_post:
+#if BX_ROMBIOS32
+  call rombios32_init
+#endif
+  call _s3_resume
+  mov bl, #0x00
+  and ax, ax
+  jz normal_post
+  call _s3_resume_panic
 
 ;--------------------
 eoi_both_pics:
@@ -9152,16 +9523,22 @@ bios32_structure:
 
 .align 16
 bios32_entry_point:
-  pushf
-  cmp eax, #0x49435024
+  pushfd
+  cmp eax, #0x49435024 ;; "$PCI"
   jne unknown_service
   mov eax, #0x80000000
   mov dx, #0x0cf8
   out dx, eax
   mov dx, #0x0cfc
   in  eax, dx
-  cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+  cmp eax, #PCI_FIXED_HOST_BRIDGE
   jne unknown_service
+#else
+  ;; say ok if a device is present
+  cmp eax, #0xffffffff
+  je unknown_service
+#endif
   mov ebx, #0x000f0000
   mov ecx, #0
   mov edx, #pcibios_protected
@@ -9170,12 +9547,15 @@ unknown_service:
 unknown_service:
   mov al, #0x80
 bios32_end:
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   retf
 
 .align 16
 pcibios_protected:
-  pushf
+  pushfd
   cli
   push esi
   push edi
@@ -9183,15 +9563,15 @@ pcibios_protected:
   jne pci_pro_f02
   mov bx, #0x0210
   mov cx, #0
-  mov edx, #0x20494350
+  mov edx, #0x20494350 ;; "PCI "
   mov al, #0x01
   jmp pci_pro_ok
 pci_pro_f02: ;; find pci device
   cmp al, #0x02
-  jne pci_pro_f08
+  jne pci_pro_f03
   shl ecx, #16
   mov cx, dx
-  mov bx, #0x0000
+  xor bx, bx
   mov di, #0x00
 pci_pro_devloop:
   call pci_pro_select_reg
@@ -9206,6 +9586,27 @@ pci_pro_nextdev:
   inc bx
   cmp bx, #0x0100
   jne pci_pro_devloop
+  mov ah, #0x86
+  jmp pci_pro_fail
+pci_pro_f03: ;; find class code
+  cmp al, #0x03
+  jne pci_pro_f08
+  xor bx, bx
+  mov di, #0x08
+pci_pro_devloop2:
+  call pci_pro_select_reg
+  mov dx, #0x0cfc
+  in  eax, dx
+  shr eax, #8
+  cmp eax, ecx
+  jne pci_pro_nextdev2
+  cmp si, #0
+  je  pci_pro_ok
+  dec si
+pci_pro_nextdev2:
+  inc bx
+  cmp bx, #0x0100
+  jne pci_pro_devloop2
   mov ah, #0x86
   jmp pci_pro_fail
 pci_pro_f08: ;; read configuration byte
@@ -9281,16 +9682,20 @@ pci_pro_fail:
 pci_pro_fail:
   pop edi
   pop esi
-  sti
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   stc
   retf
 pci_pro_ok:
   xor ah, ah
   pop edi
   pop esi
-  sti
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   clc
   retf
 
@@ -9317,8 +9722,14 @@ pcibios_real:
   out dx, eax
   mov dx, #0x0cfc
   in  eax, dx
-  cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+  cmp eax, #PCI_FIXED_HOST_BRIDGE
   je  pci_present
+#else
+  ;; say ok if a device is present
+  cmp eax, #0xffffffff
+  jne  pci_present
+#endif
   pop dx
   pop eax
   mov ah, #0xff
@@ -9332,7 +9743,7 @@ pci_present:
   mov ax, #0x0001
   mov bx, #0x0210
   mov cx, #0
-  mov edx, #0x20494350
+  mov edx, #0x20494350 ;; "PCI "
   mov edi, #0xf0000
   mov di, #pcibios_protected
   clc
@@ -9341,10 +9752,10 @@ pci_real_f02: ;; find pci device
   push esi
   push edi
   cmp al, #0x02
-  jne pci_real_f08
+  jne pci_real_f03
   shl ecx, #16
   mov cx, dx
-  mov bx, #0x0000
+  xor bx, bx
   mov di, #0x00
 pci_real_devloop:
   call pci_real_select_reg
@@ -9361,7 +9772,30 @@ pci_real_nextdev:
   jne pci_real_devloop
   mov dx, cx
   shr ecx, #16
-  mov ah, #0x86
+  mov ax, #0x8602
+  jmp pci_real_fail
+pci_real_f03: ;; find class code
+  cmp al, #0x03
+  jne pci_real_f08
+  xor bx, bx
+  mov di, #0x08
+pci_real_devloop2:
+  call pci_real_select_reg
+  mov dx, #0x0cfc
+  in  eax, dx
+  shr eax, #8
+  cmp eax, ecx
+  jne pci_real_nextdev2
+  cmp si, #0
+  je  pci_real_ok
+  dec si
+pci_real_nextdev2:
+  inc bx
+  cmp bx, #0x0100
+  jne pci_real_devloop2
+  mov dx, cx
+  shr ecx, #16
+  mov ax, #0x8603
   jmp pci_real_fail
 pci_real_f08: ;; read configuration byte
   cmp al, #0x08
@@ -9423,7 +9857,7 @@ pci_real_f0c: ;; write configuration wor
   jmp pci_real_ok
 pci_real_f0d: ;; write configuration dword
   cmp al, #0x0d
-  jne pci_real_unknown
+  jne pci_real_f0e
   call pci_real_select_reg
   push dx
   mov dx, #0x0cfc
@@ -9431,6 +9865,46 @@ pci_real_f0d: ;; write configuration dwo
   out dx, eax
   pop dx
   jmp pci_real_ok
+pci_real_f0e: ;; get irq routing options
+  cmp al, #0x0e
+  jne pci_real_unknown
+  SEG ES
+  cmp word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  jb pci_real_too_small
+  SEG ES
+  mov word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  pushf
+  push ds
+  push es
+  push cx
+  push si
+  push di
+  cld
+  mov si, #pci_routing_table_structure_start
+  push cs
+  pop ds
+  SEG ES
+  mov cx, [di+2]
+  SEG ES
+  mov es, [di+4]
+  mov di, cx
+  mov cx, #pci_routing_table_structure_end - pci_routing_table_structure_start
+  rep
+      movsb
+  pop di
+  pop si
+  pop cx
+  pop es
+  pop ds
+  popf
+  mov bx, #(1 << 9) | (1 << 11)   ;; irq 9 and 11 are used
+  jmp pci_real_ok
+pci_real_too_small:
+  SEG ES
+  mov word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  mov ah, #0x89
+  jmp pci_real_fail
+
 pci_real_unknown:
   mov ah, #0x81
 pci_real_fail:
@@ -9457,7 +9931,7 @@ pci_real_select_reg:
   out dx,  eax
   pop dx
   ret
-  
+
 .align 16
 pci_routing_table_structure:
   db 0x24, 0x50, 0x49, 0x52  ;; "$PIR" signature
@@ -9465,21 +9939,22 @@ pci_routing_table_structure:
   dw 32 + (6 * 16) ;; table size
   db 0 ;; PCI interrupt router bus
   db 0x08 ;; PCI interrupt router DevFunc
-  dw 0x0000 ;; PCI exclusive IRQs 
+  dw 0x0000 ;; PCI exclusive IRQs
   dw 0x8086 ;; compatible PCI interrupt router vendor ID
-  dw 0x7000 ;; compatible PCI interrupt router device ID
+  dw 0x122e ;; compatible PCI interrupt router device ID
   dw 0,0 ;; Miniport data
   db 0,0,0,0,0,0,0,0,0,0,0 ;; reserved
-  db 0x07 ;; checksum
+  db 0x37 ;; checksum
+pci_routing_table_structure_start:
   ;; first slot entry PCI-to-ISA (embedded)
   db 0 ;; pci bus number
   db 0x08 ;; pci device number (bit 7-3)
   db 0x61 ;; link value INTA#: pointer into PCI2ISA config space
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x62 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x63 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x60 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 0 ;; physical slot (0 = embedded)
@@ -9488,11 +9963,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x10 ;; pci device number (bit 7-3)
   db 0x62 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x63 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x60 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x61 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 1 ;; physical slot (0 = embedded)
@@ -9501,11 +9976,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x18 ;; pci device number (bit 7-3)
   db 0x63 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x60 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x61 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x62 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 2 ;; physical slot (0 = embedded)
@@ -9514,11 +9989,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x20 ;; pci device number (bit 7-3)
   db 0x60 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x61 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x62 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x63 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 3 ;; physical slot (0 = embedded)
@@ -9527,11 +10002,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x28 ;; pci device number (bit 7-3)
   db 0x61 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x62 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x63 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x60 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 4 ;; physical slot (0 = embedded)
@@ -9540,16 +10015,351 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x30 ;; pci device number (bit 7-3)
   db 0x62 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x63 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x60 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x61 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 5 ;; physical slot (0 = embedded)
   db 0 ;; reserved
+pci_routing_table_structure_end:
+
+#if !BX_ROMBIOS32
+pci_irq_list:
+  db 11, 10, 9, 5;
+
+pcibios_init_sel_reg:
+  push eax
+  mov eax, #0x800000
+  mov ax,  bx
+  shl eax, #8
+  and dl,  #0xfc
+  or  al,  dl
+  mov dx,  #0x0cf8
+  out dx,  eax
+  pop eax
+  ret
+
+pcibios_init_iomem_bases:
+  push bp
+  mov  bp, sp
+  mov  eax, #0xe0000000 ;; base for memory init
+  push eax
+  mov  ax, #0xc000 ;; base for i/o init
+  push ax
+  mov  ax, #0x0010 ;; start at base address #0
+  push ax
+  mov  bx, #0x0008
+pci_init_io_loop1:
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   ax, dx
+  cmp  ax, #0xffff
+  jz   next_pci_dev
+  mov  dl, #0x04 ;; disable i/o and memory space access
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   al, dx
+  and  al, #0xfc
+  out  dx, al
+pci_init_io_loop2:
+  mov  dl, [bp-8]
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   eax, dx
+  test al, #0x01
+  jnz  init_io_base
+  mov  ecx, eax
+  mov  eax, #0xffffffff
+  out  dx, eax
+  in   eax, dx
+  cmp  eax, ecx
+  je   next_pci_base
+  xor  eax, #0xffffffff
+  mov  ecx, eax
+  mov  eax, [bp-4]
+  out  dx, eax
+  add  eax, ecx ;; calculate next free mem base
+  add  eax, #0x01000000
+  and  eax, #0xff000000
+  mov  [bp-4], eax
+  jmp  next_pci_base
+init_io_base:
+  mov  cx, ax
+  mov  ax, #0xffff
+  out  dx, ax
+  in   ax, dx
+  cmp  ax, cx
+  je   next_pci_base
+  xor  ax, #0xfffe
+  mov  cx, ax
+  mov  ax, [bp-6]
+  out  dx, ax
+  add  ax, cx ;; calculate next free i/o base
+  add  ax, #0x0100
+  and  ax, #0xff00
+  mov  [bp-6], ax
+next_pci_base:
+  mov  al, [bp-8]
+  add  al, #0x04
+  cmp  al, #0x28
+  je   enable_iomem_space
+  mov  byte ptr[bp-8], al
+  jmp  pci_init_io_loop2
+enable_iomem_space:
+  mov  dl, #0x04 ;; enable i/o and memory space access if available
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   al, dx
+  or   al, #0x07
+  out  dx, al
+next_pci_dev:
+  mov  byte ptr[bp-8], #0x10
+  inc  bx
+  cmp  bx, #0x0100
+  jne  pci_init_io_loop1
+  mov  sp, bp
+  pop  bp
+  ret
+
+pcibios_init_set_elcr:
+  push ax
+  push cx
+  mov  dx, #0x04d0
+  test al, #0x08
+  jz   is_master_pic
+  inc  dx
+  and  al, #0x07
+is_master_pic:
+  mov  cl, al
+  mov  bl, #0x01
+  shl  bl, cl
+  in   al, dx
+  or   al, bl
+  out  dx, al
+  pop  cx
+  pop  ax
+  ret
+
+pcibios_init_irqs:
+  push ds
+  push bp
+  mov  ax, #0xf000
+  mov  ds, ax
+  mov  dx, #0x04d0 ;; reset ELCR1 + ELCR2
+  mov  al, #0x00
+  out  dx, al
+  inc  dx
+  out  dx, al
+  mov  si, #pci_routing_table_structure
+  mov  bh, [si+8]
+  mov  bl, [si+9]
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   eax, dx
+  cmp  eax, [si+12] ;; check irq router
+  jne  pci_init_end
+  mov  dl, [si+34]
+  call pcibios_init_sel_reg
+  push bx ;; save irq router bus + devfunc
+  mov  dx, #0x0cfc
+  mov  ax, #0x8080
+  out  dx, ax ;; reset PIRQ route control
+  add  dx, #2
+  out  dx, ax
+  mov  ax, [si+6]
+  sub  ax, #0x20
+  shr  ax, #4
+  mov  cx, ax
+  add  si, #0x20 ;; set pointer to 1st entry
+  mov  bp, sp
+  mov  ax, #pci_irq_list
+  push ax
+  xor  ax, ax
+  push ax
+pci_init_irq_loop1:
+  mov  bh, [si]
+  mov  bl, [si+1]
+pci_init_irq_loop2:
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   ax, dx
+  cmp  ax, #0xffff
+  jnz  pci_test_int_pin
+  test bl, #0x07
+  jz   next_pir_entry
+  jmp  next_pci_func
+pci_test_int_pin:
+  mov  dl, #0x3c
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfd
+  in   al, dx
+  and  al, #0x07
+  jz   next_pci_func
+  dec  al ;; determine pirq reg
+  mov  dl, #0x03
+  mul  al, dl
+  add  al, #0x02
+  xor  ah, ah
+  mov  bx, ax
+  mov  al, [si+bx]
+  mov  dl, al
+  mov  bx, [bp]
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  and  al, #0x03
+  add  dl, al
+  in   al, dx
+  cmp  al, #0x80
+  jb   pirq_found
+  mov  bx, [bp-2] ;; pci irq list pointer
+  mov  al, [bx]
+  out  dx, al
+  inc  bx
+  mov  [bp-2], bx
+  call pcibios_init_set_elcr
+pirq_found:
+  mov  bh, [si]
+  mov  bl, [si+1]
+  add  bl, [bp-3] ;; pci function number
+  mov  dl, #0x3c
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  out  dx, al
+next_pci_func:
+  inc  byte ptr[bp-3]
+  inc  bl
+  test bl, #0x07
+  jnz  pci_init_irq_loop2
+next_pir_entry:
+  add  si, #0x10
+  mov  byte ptr[bp-3], #0x00
+  loop pci_init_irq_loop1
+  mov  sp, bp
+  pop  bx
+pci_init_end:
+  pop  bp
+  pop  ds
+  ret
+#endif // !BX_ROMBIOS32
 #endif // BX_PCIBIOS
+
+#if BX_ROMBIOS32
+rombios32_init:
+  ;; save a20 and enable it
+  in al, 0x92
+  push ax
+  or al, #0x02
+  out 0x92, al
+
+  ;; save SS:SP to the BDA
+  xor ax, ax
+  mov ds, ax
+  mov 0x0469, ss
+  mov 0x0467, sp
+
+  SEG CS
+    lidt [pmode_IDT_info]
+  SEG CS
+    lgdt [rombios32_gdt_48]
+  ;; set PE bit in CR0
+  mov  eax, cr0
+  or   al, #0x01
+  mov  cr0, eax
+  ;; start protected mode code: ljmpl 0x10:rombios32_init1
+  db 0x66, 0xea
+  dw rombios32_05
+  dw 0x000f       ;; high 16 bit address
+  dw 0x0010
+
+use32 386
+rombios32_05:
+  ;; init data segments
+  mov eax, #0x18
+  mov ds, ax
+  mov es, ax
+  mov ss, ax
+  xor eax, eax
+  mov fs, ax
+  mov gs, ax
+  cld
+
+  ;; init the stack pointer to point below EBDA
+  mov ax, [0x040e]
+  shl eax, #4
+  mov esp, #-0x10
+  add esp, eax
+
+  ;; pass pointer to s3_resume_flag and s3_resume_vector to rombios32
+  push #0x04b0
+  push #0x04b2
+
+  ;; call rombios32 code
+  mov eax, #0x000e0000
+  call eax
+
+  ;; return to 16 bit protected mode first
+  db 0xea
+  dd rombios32_10
+  dw 0x20
+
+use16 386
+rombios32_10:
+  ;; restore data segment limits to 0xffff
+  mov ax, #0x28
+  mov ds, ax
+  mov es, ax
+  mov ss, ax
+  mov fs, ax
+  mov gs, ax
+
+  ;; reset PE bit in CR0
+  mov  eax, cr0
+  and  al, #0xFE
+  mov  cr0, eax
+
+  ;; far jump to flush CPU queue after transition to real mode
+  JMP_AP(0xf000, rombios32_real_mode)
+
+rombios32_real_mode:
+  ;; restore IDT to normal real-mode defaults
+  SEG CS
+    lidt [rmode_IDT_info]
+
+  xor ax, ax
+  mov ds, ax
+  mov es, ax
+  mov fs, ax
+  mov gs, ax
+
+  ;; restore SS:SP from the BDA
+  mov ss, 0x0469
+  xor esp, esp
+  mov sp, 0x0467
+  ;; restore a20
+  pop ax
+  out 0x92, al
+  ret
+
+rombios32_gdt_48:
+  dw 0x30
+  dw rombios32_gdt
+  dw 0x000f
+
+rombios32_gdt:
+  dw 0, 0, 0, 0
+  dw 0, 0, 0, 0
+  dw 0xffff, 0, 0x9b00, 0x00cf ; 32 bit flat code segment (0x10)
+  dw 0xffff, 0, 0x9300, 0x00cf ; 32 bit flat data segment (0x18)
+  dw 0xffff, 0, 0x9b0f, 0x0000 ; 16 bit code segment base=0xf0000 limit=0xffff
+  dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
+#endif // BX_ROMBIOS32
+
 
 ; parallel port detection: base address in DX, index in BX, timeout in CL
 detect_parport:
@@ -9621,13 +10431,12 @@ checksum_loop:
   ret
 
 
-;; We need a copy of this string, but we are not actually a PnP BIOS, 
+;; We need a copy of this string, but we are not actually a PnP BIOS,
 ;; so make sure it is *not* aligned, so OSes will not see it if they scan.
 .align 16
   db 0
 pnp_string:
   .ascii "$PnP"
-
 
 rom_scan:
   ;; Scan for existence of valid expansion ROMS.
@@ -9645,8 +10454,9 @@ rom_scan:
 #if BX_TCGBIOS
   call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
 #endif
-  mov  cx, #0xc000
+
 rom_scan_loop:
+  push ax       ;; Save AX
   mov  ds, cx
   mov  ax, #0x0004 ;; start with increment of 4 (512-byte) blocks = 2k
   cmp [0], #0xAA55 ;; look for signature
@@ -9663,6 +10473,8 @@ rom_scan_loop:
   add  al, #0x04
 block_count_rounded:
 
+  xor  bx, bx   ;; Restore DS back to 0000:
+  mov  ds, bx
 #if BX_TCGBIOS
   push ax
   push ds
@@ -9673,7 +10485,7 @@ block_count_rounded:
   push ecx       ;; segment where option rom is located at
   call _tcpa_option_rom                   /* specs: 3.2.3.3 */
   add sp, #4    ;; pop segment
-  pop ecx      ;; original ecx
+  pop ecx      ;; original ecx
   pop ds
   pop ax
 #endif
@@ -9697,11 +10509,11 @@ fetch_bdf:
   xor  ax, ax
   mov  al, [bx]
 
-  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.  
+  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
   ;; That should stop it grabbing INT 19h; we will use its BEV instead.
   mov  bx, #0xf000
   mov  es, bx
-  lea  di, pnp_string 
+  lea  di, pnp_string
 
   xor  bx, bx   ;; Restore DS back to 0000:
   mov  ds, bx
@@ -9714,8 +10526,8 @@ fetch_bdf:
   add  sp, #2   ;; Pop offset value
   pop  cx       ;; Pop seg value (restore CX)
 
-  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed 
-  ;; to init all the ROMs and then go back and build an IPL table of 
+  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed
+  ;; to init all the ROMs and then go back and build an IPL table of
   ;; all the bootable devices, but we can get away with one pass.
   mov  ds, cx       ;; ROM base
   mov  bx, 0x001a   ;; 0x1A is the offset into ROM header that contains...
@@ -9723,22 +10535,54 @@ fetch_bdf:
   cmp  ax, #0x5024  ;; we look for signature "$PnP"
   jne  no_bev
   mov  ax, 2[bx]
-  cmp  ax, #0x506e 
+  cmp  ax, #0x506e
   jne  no_bev
+
+  mov  ax, 0x16[bx] ;; 0x16 is the offset of Boot Connection Vector
+  cmp  ax, #0x0000
+  je   no_bcv
+
+  ;; Option ROM has BCV. Run it now.
+  push cx       ;; Push seg
+  push ax       ;; Push offset
+
+  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
+  mov  bx, #0xf000
+  mov  es, bx
+  lea  di, pnp_string
+  /* jump to BCV function entry pointer */
+  mov  bp, sp   ;; Call ROM BCV routine using seg:off on stack
+  db   0xff     ;; call_far ss:[bp+0]
+  db   0x5e
+  db   0
+  cli           ;; In case expansion ROM BIOS turns IF on
+  add  sp, #2   ;; Pop offset value
+  pop  cx       ;; Pop seg value (restore CX)
+  jmp   no_bev
+
+no_bcv:
   mov  ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of...
   cmp  ax, #0x0000  ;; the Bootstrap Entry Vector, or zero if there is none.
   je   no_bev
 
-  ;; Found a device that thinks it can boot the system.  Record its BEV.
-  mov  bx, #IPL_SEG            ;; Go to the segment where the IPL table lives 
+  ;; Found a device that thinks it can boot the system.  Record its BEV and 
product name string.
+  mov  di, 0x10[bx]            ;; Pointer to the product name string or zero 
if none
+  xor  bx, bx
   mov  ds, bx
+  mov  bx, word ptr [0x40E]    ;; EBDA segment
+  mov  ds, bx                  ;; Go to the segment where the IPL table lives
   mov  bx, IPL_COUNT_OFFSET    ;; Read the number of entries so far
   cmp  bx, #IPL_TABLE_ENTRIES
   je   no_bev                  ;; Get out if the table is full
   shl  bx, #0x4                ;; Turn count into offset (entries are 16 bytes)
-  mov  0[bx], #0x80            ;; This entry is a BEV device
-  mov  6[bx], cx               ;; Build a far pointer from the segment...
-  mov  4[bx], ax               ;; and the offset
+  mov  IPL_TABLE_OFFSET+0[bx], #IPL_TYPE_BEV ;; This entry is a BEV device
+  mov  IPL_TABLE_OFFSET+6[bx], cx            ;; Build a far pointer from the 
segment...
+  mov  IPL_TABLE_OFFSET+4[bx], ax            ;; and the offset
+  cmp  di, #0x0000
+  je   no_prod_str
+  mov  0xA[bx], cx             ;; Build a far pointer from the segment...
+  mov  8[bx], di               ;; and the offset
+no_prod_str:
   shr  bx, #0x4                ;; Turn the offset back into a count
   inc  bx                      ;; We have one more entry now
   mov  IPL_COUNT_OFFSET, bx    ;; Remember that.
@@ -9750,7 +10594,8 @@ rom_scan_increment:
   shl  ax, #5   ;; convert 512-bytes blocks to 16-byte increments
                 ;; because the segment selector is shifted left 4 bits.
   add  cx, ax
-  cmp  cx, #0xe000
+  pop  ax       ;; Restore AX
+  cmp  cx, ax
   jbe  rom_scan_loop
 
   xor  ax, ax   ;; Restore DS back to 0000:
@@ -9815,234 +10660,7 @@ tcpa_post_part2:
 #endif
 
 
-;; for 'C' strings and other data, insert them here with
-;; a the following hack:
-;; DATA_SEG_DEFS_HERE
-
-
-;--------
-;- POST -
-;--------
-.org 0xe05b ; POST Entry Point
-post:
-
-  xor ax, ax
-
-  ;; first reset the DMA controllers
-  out 0x0d,al
-  out 0xda,al
-
-  ;; then initialize the DMA controllers
-  mov al, #0xC0
-  out 0xD6, al ; cascade mode of channel 4 enabled
-  mov al, #0x00
-  out 0xD4, al ; unmask channel 4
-
-  ;; Examine CMOS shutdown status.
-  mov AL, #0x0f
-  out 0x70, AL
-  in  AL, 0x71
-
-  ;; backup status
-  mov bl, al
-
-  ;; Reset CMOS shutdown status.
-  mov AL, #0x0f
-  out 0x70, AL          ; select CMOS register Fh
-  mov AL, #0x00
-  out 0x71, AL          ; set shutdown action to normal
-
-  ;; Examine CMOS shutdown status.
-  mov al, bl
-  mov dx, #EBDA_SEG
-  mov ds, dx
-  mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
-
-  cli
-  mov  ax, #0xfffe
-  mov  sp, ax
-  mov  ax, #0x0000
-  mov  ds, ax
-  mov  ss, ax
-
-  ;; zero out BIOS data area (40:00..40:ff)
-  mov  es, ax
-  mov  cx, #0x0080 ;; 128 words
-  mov  di, #0x0400
-  cld
-  rep
-    stosw
-
-  call _log_bios_start
-
-  ;; set all interrupts to default handler
-  mov  bx, #0x0000    ;; offset index
-  mov  cx, #0x0100    ;; counter (256 interrupts)
-  mov  ax, #dummy_iret_handler
-  mov  dx, #0xF000
-
-post_default_ints:
-  mov  [bx], ax
-  inc  bx
-  inc  bx
-  mov  [bx], dx
-  inc  bx
-  inc  bx
-  loop post_default_ints
-
-  ;; set vector 0x79 to zero
-  ;; this is used by 'gardian angel' protection system
-  SET_INT_VECTOR(0x79, #0, #0)
-
-  ;; base memory in K 40:13 (word)
-  mov  ax, #BASE_MEM_IN_K
-  mov  0x0413, ax
-
-
-  ;; Manufacturing Test 40:12
-  ;;   zerod out above
-
-  ;; Warm Boot Flag 0040:0072
-  ;;   value of 1234h = skip memory checks
-  ;;   zerod out above
-
-
-  ;; Printer Services vector
-  SET_INT_VECTOR(0x17, #0xF000, #int17_handler)
-
-  ;; Bootstrap failure vector
-  SET_INT_VECTOR(0x18, #0xF000, #int18_handler)
-
-  ;; Bootstrap Loader vector
-  SET_INT_VECTOR(0x19, #0xF000, #int19_handler)
-
-  ;; User Timer Tick vector
-  SET_INT_VECTOR(0x1c, #0xF000, #int1c_handler)
-
-  ;; Memory Size Check vector
-  SET_INT_VECTOR(0x12, #0xF000, #int12_handler)
-
-  ;; Equipment Configuration Check vector
-  SET_INT_VECTOR(0x11, #0xF000, #int11_handler)
-
-  ;; System Services
-  SET_INT_VECTOR(0x15, #0xF000, #int15_handler)
-
-  ;; EBDA setup
-  call ebda_post
-
-  ;; PIT setup
-  SET_INT_VECTOR(0x08, #0xF000, #int08_handler)
-  ;; int 1C already points at dummy_iret_handler (above)
-  mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
-  out 0x43, al
-#ifdef HVMASSIST
-  mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
-  out 0x40, al ; lsb
-  mov al, #0xe9
-  out 0x40, al ; msb
-#else
-  mov al, #0x00 ; maximum count of 0000H = 18.2Hz
-  out 0x40, al
-  out 0x40, al
-#endif
-
-  ;; Keyboard
-  SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
-  SET_INT_VECTOR(0x16, #0xF000, #int16_handler)
-
-  xor  ax, ax
-  mov  ds, ax
-  mov  0x0417, al /* keyboard shift flags, set 1 */
-  mov  0x0418, al /* keyboard shift flags, set 2 */
-  mov  0x0419, al /* keyboard alt-numpad work area */
-  mov  0x0471, al /* keyboard ctrl-break flag */
-  mov  0x0497, al /* keyboard status flags 4 */
-  mov  al, #0x10
-  mov  0x0496, al /* keyboard status flags 3 */
-
-
-  /* keyboard head of buffer pointer */
-  mov  bx, #0x001E
-  mov  0x041A, bx
-
-  /* keyboard end of buffer pointer */
-  mov  0x041C, bx
-
-  /* keyboard pointer to start of buffer */
-  mov  bx, #0x001E
-  mov  0x0480, bx
-
-  /* keyboard pointer to end of buffer */
-  mov  bx, #0x003E
-  mov  0x0482, bx
-
-  /* init the keyboard */
-  call _keyboard_init
-
-  ;; mov CMOS Equipment Byte to BDA Equipment Word
-  mov  ax, 0x0410
-  mov  al, #0x14
-  out  0x70, al
-  in   al, 0x71
-  mov  0x0410, ax
-
-#if BX_TCGBIOS
-  call tcpa_post_part1
-#endif
-
-  ;; Parallel setup
-  SET_INT_VECTOR(0x0F, #0xF000, #dummy_iret_handler)
-  xor ax, ax
-  mov ds, ax
-  xor bx, bx
-  mov cl, #0x14 ; timeout value
-  mov dx, #0x378 ; Parallel I/O address, port 1
-  call detect_parport
-  mov dx, #0x278 ; Parallel I/O address, port 2
-  call detect_parport
-  shl bx, #0x0e
-  mov ax, 0x410   ; Equipment word bits 14..15 determing # parallel ports
-  and ax, #0x3fff
-  or  ax, bx ; set number of parallel ports
-  mov 0x410, ax
-
-  ;; Serial setup
-  SET_INT_VECTOR(0x0C, #0xF000, #dummy_iret_handler)
-  SET_INT_VECTOR(0x14, #0xF000, #int14_handler)
-  xor bx, bx
-  mov cl, #0x0a ; timeout value
-  mov dx, #0x03f8 ; Serial I/O address, port 1
-  call detect_serial
-  mov dx, #0x02f8 ; Serial I/O address, port 2
-  call detect_serial
-  mov dx, #0x03e8 ; Serial I/O address, port 3
-  call detect_serial
-  mov dx, #0x02e8 ; Serial I/O address, port 4
-  call detect_serial
-  shl bx, #0x09
-  mov ax, 0x410   ; Equipment word bits 9..11 determing # serial ports
-  and ax, #0xf1ff
-  or  ax, bx ; set number of serial port
-  mov 0x410, ax
-
-  ;; CMOS RTC
-  SET_INT_VECTOR(0x1A, #0xF000, #int1a_handler)
-  SET_INT_VECTOR(0x4A, #0xF000, #dummy_iret_handler)
-  SET_INT_VECTOR(0x70, #0xF000, #int70_handler)
-  ;; BIOS DATA AREA 0x4CE ???
-  call timer_tick_post
-
-  ;; PS/2 mouse setup
-  SET_INT_VECTOR(0x74, #0xF000, #int74_handler)
-
-  ;; IRQ13 (FPU exception) setup
-  SET_INT_VECTOR(0x75, #0xF000, #int75_handler)
-
-  ;; Video setup
-  SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
-
-  ;; PIC
+post_init_pic:
   mov al, #0x11 ; send initialisation commands
   out 0x20, al
   out 0xa0, al
@@ -10065,6 +10683,329 @@ post_default_ints:
   mov  al, #0x9f
 #endif
   out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
+  ret
+
+;; the following area can be used to write dynamically generated tables
+  .align 16
+bios_table_area_start:
+  dd 0xaafb4442
+  dd bios_table_area_end - bios_table_area_start - 8;
+
+;--------
+;- POST -
+;--------
+.org 0xe05b ; POST Entry Point
+post:
+
+  xor ax, ax
+
+  ;; first reset the DMA controllers
+  out 0x0d,al
+  out 0xda,al
+
+  ;; then initialize the DMA controllers
+  mov al, #0xC0
+  out 0xD6, al ; cascade mode of channel 4 enabled
+  mov al, #0x00
+  out 0xD4, al ; unmask channel 4
+
+  ;; Examine CMOS shutdown status.
+  mov AL, #0x0f
+  out 0x70, AL
+  in  AL, 0x71
+
+  ;; backup status
+  mov bl, al
+
+  ;; Reset CMOS shutdown status.
+  mov AL, #0x0f
+  out 0x70, AL          ; select CMOS register Fh
+  mov AL, #0x00
+  out 0x71, AL          ; set shutdown action to normal
+
+  ;; Examine CMOS shutdown status.
+  mov al, bl
+
+  ;; 0x00, 0x09, 0x0D+ = normal startup
+  cmp AL, #0x00
+  jz normal_post
+  cmp AL, #0x0d
+  jae normal_post
+  cmp AL, #0x09
+  je normal_post
+
+  ;; 0x05 = eoi + jmp via [0x40:0x67] jump
+  cmp al, #0x05
+  je  eoi_jmp_post
+
+  ;; 0x0A = jmp via [0x40:0x67] jump
+  cmp al, #0x0a
+  je  jmp_post_0x467
+
+  ;; 0x0B = iret via [0x40:0x67]
+  cmp al, #0x0b
+  je  iret_post_0x467
+
+  ;; 0x0C = retf via [0x40:0x67]
+  cmp al, #0x0c
+  je  retf_post_0x467
+
+  ;; Examine CMOS shutdown status.
+  ;;  0x01,0x02,0x03,0x04,0x06,0x07,0x08 = Unimplemented shutdown status.
+  push bx
+  call _shutdown_status_panic
+
+#if 0
+  HALT(__LINE__)
+  ;
+  ;#if 0
+  ;  0xb0, 0x20,       /* mov al, #0x20 */
+  ;  0xe6, 0x20,       /* out 0x20, al    ;send EOI to PIC */
+  ;#endif
+  ;
+  pop es
+  pop ds
+  popa
+  iret
+#endif
+
+normal_post:
+  ; case 0: normal startup
+
+  cli
+  mov  ax, #0xfffe
+  mov  sp, ax
+  xor  ax, ax
+  mov  ds, ax
+  mov  ss, ax
+
+  ;; Save shutdown status
+  mov 0x04b0, bl
+
+  cmp bl, #0xfe
+  jz s3_post
+
+  ;; zero out BIOS data area (40:00..40:ff)
+  mov  es, ax
+  mov  cx, #0x0080 ;; 128 words
+  mov  di, #0x0400
+  cld
+  rep
+    stosw
+
+  call _log_bios_start
+
+  ;; set all interrupts to default handler
+  xor  bx, bx         ;; offset index
+  mov  cx, #0x0100    ;; counter (256 interrupts)
+  mov  ax, #dummy_iret_handler
+  mov  dx, #0xF000
+
+post_default_ints:
+  mov  [bx], ax
+  add  bx, #2
+  mov  [bx], dx
+  add  bx, #2
+  loop post_default_ints
+
+  ;; set vector 0x79 to zero
+  ;; this is used by 'gardian angel' protection system
+  SET_INT_VECTOR(0x79, #0, #0)
+
+  ;; base memory in K 40:13 (word)
+  mov  ax, #BASE_MEM_IN_K
+  mov  0x0413, ax
+
+
+  ;; Manufacturing Test 40:12
+  ;;   zerod out above
+
+  ;; Warm Boot Flag 0040:0072
+  ;;   value of 1234h = skip memory checks
+  ;;   zerod out above
+
+
+  ;; Printer Services vector
+  SET_INT_VECTOR(0x17, #0xF000, #int17_handler)
+
+  ;; Bootstrap failure vector
+  SET_INT_VECTOR(0x18, #0xF000, #int18_handler)
+
+  ;; Bootstrap Loader vector
+  SET_INT_VECTOR(0x19, #0xF000, #int19_handler)
+
+  ;; User Timer Tick vector
+  SET_INT_VECTOR(0x1c, #0xF000, #int1c_handler)
+
+  ;; Memory Size Check vector
+  SET_INT_VECTOR(0x12, #0xF000, #int12_handler)
+
+  ;; Equipment Configuration Check vector
+  SET_INT_VECTOR(0x11, #0xF000, #int11_handler)
+
+  ;; System Services
+  SET_INT_VECTOR(0x15, #0xF000, #int15_handler)
+
+  ;; EBDA setup
+  call ebda_post
+
+  ;; PIT setup
+  SET_INT_VECTOR(0x08, #0xF000, #int08_handler)
+  ;; int 1C already points at dummy_iret_handler (above)
+  mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
+  out 0x43, al
+#ifdef HVMASSIST
+  mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
+  out 0x40, al ; lsb
+  mov al, #0xe9
+  out 0x40, al ; msb
+#else
+  mov al, #0x00 ; maximum count of 0000H = 18.2Hz
+  out 0x40, al
+  out 0x40, al
+#endif
+
+  ;; Keyboard
+  SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
+  SET_INT_VECTOR(0x16, #0xF000, #int16_handler)
+
+  xor  ax, ax
+  mov  ds, ax
+  mov  0x0417, al /* keyboard shift flags, set 1 */
+  mov  0x0418, al /* keyboard shift flags, set 2 */
+  mov  0x0419, al /* keyboard alt-numpad work area */
+  mov  0x0471, al /* keyboard ctrl-break flag */
+  mov  0x0497, al /* keyboard status flags 4 */
+  mov  al, #0x10
+  mov  0x0496, al /* keyboard status flags 3 */
+
+
+  /* keyboard head of buffer pointer */
+  mov  bx, #0x001E
+  mov  0x041A, bx
+
+  /* keyboard end of buffer pointer */
+  mov  0x041C, bx
+
+  /* keyboard pointer to start of buffer */
+  mov  bx, #0x001E
+  mov  0x0480, bx
+
+  /* keyboard pointer to end of buffer */
+  mov  bx, #0x003E
+  mov  0x0482, bx
+
+  /* init the keyboard */
+  call _keyboard_init
+
+  ;; mov CMOS Equipment Byte to BDA Equipment Word
+  mov  ax, 0x0410
+  mov  al, #0x14
+  out  0x70, al
+  in   al, 0x71
+  mov  0x0410, ax
+
+#if BX_TCGBIOS
+  call tcpa_post_part1
+#endif
+
+  ;; Parallel setup
+  SET_INT_VECTOR(0x0F, #0xF000, #dummy_iret_handler)
+  xor ax, ax
+  mov ds, ax
+  xor bx, bx
+  mov cl, #0x14 ; timeout value
+  mov dx, #0x378 ; Parallel I/O address, port 1
+  call detect_parport
+  mov dx, #0x278 ; Parallel I/O address, port 2
+  call detect_parport
+  shl bx, #0x0e
+  mov ax, 0x410   ; Equipment word bits 14..15 determing # parallel ports
+  and ax, #0x3fff
+  or  ax, bx ; set number of parallel ports
+  mov 0x410, ax
+
+  ;; Serial setup
+  SET_INT_VECTOR(0x0C, #0xF000, #dummy_iret_handler)
+  SET_INT_VECTOR(0x14, #0xF000, #int14_handler)
+  xor bx, bx
+  mov cl, #0x0a ; timeout value
+  mov dx, #0x03f8 ; Serial I/O address, port 1
+  call detect_serial
+  mov dx, #0x02f8 ; Serial I/O address, port 2
+  call detect_serial
+  mov dx, #0x03e8 ; Serial I/O address, port 3
+  call detect_serial
+  mov dx, #0x02e8 ; Serial I/O address, port 4
+  call detect_serial
+  shl bx, #0x09
+  mov ax, 0x410   ; Equipment word bits 9..11 determing # serial ports
+  and ax, #0xf1ff
+  or  ax, bx ; set number of serial port
+  mov 0x410, ax
+
+  ;; CMOS RTC
+  SET_INT_VECTOR(0x1A, #0xF000, #int1a_handler)
+  SET_INT_VECTOR(0x4A, #0xF000, #dummy_iret_handler)
+  SET_INT_VECTOR(0x70, #0xF000, #int70_handler)
+  ;; BIOS DATA AREA 0x4CE ???
+  call timer_tick_post
+
+  ;; PS/2 mouse setup
+  SET_INT_VECTOR(0x74, #0xF000, #int74_handler)
+
+  ;; IRQ13 (FPU exception) setup
+  SET_INT_VECTOR(0x75, #0xF000, #int75_handler)
+
+  ;; Video setup
+  SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
+
+  ;; PIC
+  call post_init_pic
+
+  mov  cx, #0xc000  ;; init vga bios
+  mov  ax, #0xc780
+  call rom_scan
+
+  call _print_bios_banner
+
+#if BX_ROMBIOS32
+  call rombios32_init
+#else
+#if BX_PCIBIOS
+  call pcibios_init_iomem_bases
+  call pcibios_init_irqs
+#endif //BX_PCIBIOS
+#endif
+
+  ;;
+  ;; Floppy setup
+  ;;
+  call floppy_drive_post
+
+  ;;
+  ;; Hard Drive setup
+  ;;
+  call hard_drive_post
+
+#if BX_USE_ATADRV
+
+  ;;
+  ;; ATA/ATAPI driver setup
+  ;;
+  call _ata_init
+  call _ata_detect
+  ;;
+
+#endif // BX_USE_ATADRV
+
+#if BX_ELTORITO_BOOT
+  ;;
+  ;; eltorito floppy/harddisk emulation from cd
+  ;;
+  call _cdemu_init
+  ;;
+#endif // BX_ELTORITO_BOOT
 
 #ifdef HVMASSIST
   call _enable_rom_write_access
@@ -10076,52 +11017,19 @@ post_default_ints:
 
   call _init_boot_vectors
 
+  mov  cx, #0xc800  ;; init option roms
+  mov  ax, #0xe000
   call rom_scan
 
-  call _print_bios_banner 
-
-  ;;
-  ;; Floppy setup
-  ;;
-  call floppy_drive_post
-
-#if BX_USE_ATADRV
-
-  ;;
-  ;; Hard Drive setup
-  ;;
-  call hard_drive_post
-
-  ;;
-  ;; ATA/ATAPI driver setup
-  ;;
-  call _ata_init
-  call _ata_detect
-  ;;
-#else // BX_USE_ATADRV
-
-  ;;
-  ;; Hard Drive setup
-  ;;
-  call hard_drive_post
-
-#endif // BX_USE_ATADRV
-
 #if BX_ELTORITO_BOOT
-  ;;
-  ;; eltorito floppy/harddisk emulation from cd
-  ;;
-  call _cdemu_init
-  ;;
+  call _interactive_bootkey
 #endif // BX_ELTORITO_BOOT
-
-  call _s3_resume
-  call _interactive_bootkey
 
 #if BX_TCGBIOS
   call tcpa_post_part2
 #endif
 
+  sti        ;; enable interrupts
   ;; Start the boot sequence.   See the comments in int19_relocated 
   ;; for why we use INT 18h instead of INT 19h here.
   int  #0x18
@@ -10134,7 +11042,7 @@ nmi:
   iret
 
 int75_handler:
-  out  0xf0, al         // clear irq13 
+  out  0xf0, al         // clear irq13
   call eoi_both_pics    // clear interrupt
   int  2                // legacy nmi call
   iret
@@ -10233,7 +11141,7 @@ int14_handler:
 int14_handler:
   push ds
   pusha
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   call _int14_function
   popa
@@ -10338,26 +11246,7 @@ int09_handler:
   jz  int09_finish
 
   in  al, #0x60             ;;read key from keyboard controller
-  //test al, #0x80            ;;look for key release
-  //jnz  int09_process_key    ;; dont pass releases to intercept?
-
-  ;; check for extended key
-  cmp  al, #0xe0
-  jne int09_call_int15_4f
-  
-  push ds
-  xor  ax, ax
-  mov  ds, ax
-  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
-  or   al, #0x01
-  mov  BYTE [0x496], al
-  pop  ds
-  
-  in  al, #0x60             ;;read another key from keyboard controller
-
   sti
-
-int09_call_int15_4f:
   push  ds
   pusha
 #ifdef BX_CALL_INT15_4F
@@ -10367,8 +11256,27 @@ int09_call_int15_4f:
   jnc  int09_done
 #endif
 
-
-//int09_process_key:
+  ;; check for extended key
+  cmp  al, #0xe0
+  jne int09_check_pause
+  xor  ax, ax
+  mov  ds, ax
+  mov  al, BYTE [0x496]     ;; mf2_state |= 0x02
+  or   al, #0x02
+  mov  BYTE [0x496], al
+  jmp int09_done
+
+int09_check_pause: ;; check for pause key
+  cmp  al, #0xe1
+  jne int09_process_key
+  xor  ax, ax
+  mov  ds, ax
+  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
+  or   al, #0x01
+  mov  BYTE [0x496], al
+  jmp int09_done
+
+int09_process_key:
   mov   bx, #0xf000
   mov   ds, bx
   call  _int09_function
@@ -10384,8 +11292,6 @@ int09_finish:
   out #0x64, al
   pop ax
   iret
-
-
 
 
 ;----------------------------------------
@@ -10426,7 +11332,7 @@ int0e_loop2:
   je int0e_loop2
 int0e_normal:
   push ds
-  mov  ax, #0x0000 ;; segment 0000
+  xor  ax, ax ;; segment 0000
   mov  ds, ax
   call eoi_master_pic
   mov  al, 0x043e
@@ -10463,7 +11369,7 @@ int17_handler:
 int17_handler:
   push ds
   pusha
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   call _int17_function
   popa
@@ -10653,11 +11559,11 @@ int1a_callfunction:
 ;;
 int70_handler:
   push ds
-  pusha
+  pushad
   xor  ax, ax
   mov  ds, ax
   call _int70_function
-  popa
+  popad
   pop  ds
   iret
 
@@ -10715,7 +11621,7 @@ int08_store_ticks:
 
 
 .org 0xff00
-.ascii "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
+.ascii BIOS_COPYRIGHT_STRING
 
 ;------------------------------------------------
 ;- IRET Instruction for Dummy Interrupt Handler -
@@ -10737,7 +11643,7 @@ dummy_iret_handler:
 #ifdef HVMTEST
   jmp 0xd000:0x0003;
 #else
-  jmp 0xf000:post
+   jmp 0xf000:post
 #endif
 
 .org 0xfff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
@@ -10750,10 +11656,10 @@ db 0x00   ; filler
 .org 0xfa6e ;; Character Font for 320x200 & 640x200 Graphics (lower 128 
characters)
 ASM_END
 /*
- * This font comes from the fntcol16.zip package (c) by  Joseph Gil 
+ * This font comes from the fntcol16.zip package (c) by  Joseph Gil
  * found at ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
  * This font is public domain
- */ 
+ */
 static Bit8u vgafont8[128*8]=
 {
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -10929,328 +11835,10 @@ db 0,0,0,0,0,0,0   ; 31 bytes
 db 0,0,0,0,0,0,0   ; 31 bytes
 ASM_END
 
-#else // !HVMASSIST
-
+#endif // HVMASSIST
 ASM_START
-.org 0xcc00
+.org 0xcff0
+bios_table_area_end:
 // bcc-generated data will be placed here
-
-// For documentation of this config structure, look on developer.intel.com and
-// search for multiprocessor specification.  Note that when you change anything
-// you must update the checksum (a pain!).  It would be better to construct 
this
-// with C structures, or at least fill in the checksum automatically.
-//
-// Maybe this structs could be moved elsewhere than d000
-
-#if (BX_SMP_PROCESSORS==1)
-  // no structure necessary.
-#elif (BX_SMP_PROCESSORS==2)
-// define the Intel MP Configuration Structure for 2 processors at
-// APIC ID 0,1.  I/O APIC at ID=2.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0x65 ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 20 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 2 ;; apic id=2. linux will set.
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 2 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,2,1
-  db 3,0,0,0,0,2,2,2
-  db 3,0,0,0,0,3,2,3
-  db 3,0,0,0,0,4,2,4
-  db 3,0,0,0,0,5,2,5
-  db 3,0,0,0,0,6,2,6
-  db 3,0,0,0,0,7,2,7
-  db 3,0,0,0,0,8,2,8
-  db 3,0,0,0,0,9,2,9
-  db 3,0,0,0,0,10,2,10
-  db 3,0,0,0,0,11,2,11
-  db 3,0,0,0,0,12,2,12
-  db 3,0,0,0,0,13,2,13
-  db 3,0,0,0,0,14,2,14
-  db 3,0,0,0,0,15,2,15
-#elif (BX_SMP_PROCESSORS==4)
-// define the Intel MP Configuration Structure for 4 processors at
-// APIC ID 0,1,2,3.  I/O APIC at ID=4.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0xdd ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 22 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc2:
-  db 0 ;; entry type=processor
-  db 2 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc3:
-  db 0 ;; entry type=processor
-  db 3 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 4 ;; apic id=4. linux will set.
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 4 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,4,1
-  db 3,0,0,0,0,2,4,2
-  db 3,0,0,0,0,3,4,3
-  db 3,0,0,0,0,4,4,4
-  db 3,0,0,0,0,5,4,5
-  db 3,0,0,0,0,6,4,6
-  db 3,0,0,0,0,7,4,7
-  db 3,0,0,0,0,8,4,8
-  db 3,0,0,0,0,9,4,9
-  db 3,0,0,0,0,10,4,10
-  db 3,0,0,0,0,11,4,11
-  db 3,0,0,0,0,12,4,12
-  db 3,0,0,0,0,13,4,13
-  db 3,0,0,0,0,14,4,14
-  db 3,0,0,0,0,15,4,15
-#elif (BX_SMP_PROCESSORS==8)
-// define the Intel MP Configuration Structure for 8 processors at
-// APIC ID 0,1,2,3,4,5,6,7.  I/O APIC at ID=8.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0xc3 ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 26 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc2:
-  db 0 ;; entry type=processor
-  db 2 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc3:
-  db 0 ;; entry type=processor
-  db 3 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc4:
-  db 0 ;; entry type=processor
-  db 4 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc5:
-  db 0 ;; entry type=processor
-  db 5 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc6:
-  db 0 ;; entry type=processor
-  db 6 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc7:
-  db 0 ;; entry type=processor
-  db 7 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 8 ;; apic id=8
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 8 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,8,1
-  db 3,0,0,0,0,2,8,2
-  db 3,0,0,0,0,3,8,3
-  db 3,0,0,0,0,4,8,4
-  db 3,0,0,0,0,5,8,5
-  db 3,0,0,0,0,6,8,6
-  db 3,0,0,0,0,7,8,7
-  db 3,0,0,0,0,8,8,8
-  db 3,0,0,0,0,9,8,9
-  db 3,0,0,0,0,10,8,10
-  db 3,0,0,0,0,11,8,11
-  db 3,0,0,0,0,12,8,12
-  db 3,0,0,0,0,13,8,13
-  db 3,0,0,0,0,14,8,14
-  db 3,0,0,0,0,15,8,15
-#else
-#  error Sorry, rombios only has configurations for 1, 2, 4 or 8 processors.
-#endif  // if (BX_SMP_PROCESSORS==...)
-
-mp_config_end:   // this label used to find length of mp structure
- db 0
-
-#if (BX_SMP_PROCESSORS>1)
-.align 16
-mp_floating_pointer_structure:
-db 0x5f, 0x4d, 0x50, 0x5f   ; "_MP_" signature
-dw mp_config_table, 0xf ;; pointer to MP configuration table
-db 1     ;; length of this struct in 16-bit byte chunks
-db 4     ;; MP spec revision
-db 0xc1  ;; checksum
-db 0     ;; MP feature byte 1.  value 0 means look at the config table
-db 0,0,0,0     ;; MP feature bytes 2-5.
-#endif
-
 ASM_END
 
-#endif // HVMASSIST
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/rombios/rombios.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/rombios/rombios.h  Wed Dec 24 12:52:34 2008 +0900
@@ -0,0 +1,70 @@
+/////////////////////////////////////////////////////////////////////////
+// $Id: rombios.h,v 1.8 2008/12/04 18:48:33 sshwarts Exp $
+/////////////////////////////////////////////////////////////////////////
+//
+//  Copyright (C) 2006 Volker Ruppert
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 
USA
+
+/* define it to include QEMU specific code */
+//#define BX_QEMU
+#define LEGACY
+
+#ifndef LEGACY
+#  define BX_ROMBIOS32     1
+#else
+#  define BX_ROMBIOS32     0
+#endif
+#define DEBUG_ROMBIOS    1
+
+#define PANIC_PORT  0x400
+#define PANIC_PORT2 0x401
+#define INFO_PORT   0x402
+#define DEBUG_PORT  0x403
+
+#define BIOS_PRINTF_HALT     1
+#define BIOS_PRINTF_SCREEN   2
+#define BIOS_PRINTF_INFO     4
+#define BIOS_PRINTF_DEBUG    8
+#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
+#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | 
BIOS_PRINTF_HALT)
+
+#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
+
+// Defines the output macros.
+// BX_DEBUG goes to INFO port until we can easily choose debug info on a
+// per-device basis. Debug info are sent only in debug mode
+#if DEBUG_ROMBIOS
+#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#else
+#  define BX_DEBUG(format, p...)
+#endif
+#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+
+#define ACPI_DATA_SIZE    0x00010000L
+#define PM_IO_BASE        0xb000
+#define SMB_IO_BASE       0xb100
+
+  // Define the application NAME
+#if define HVMASSIST
+#  define BX_APPNAME "HVMAssist"
+#elif defined(BX_QEMU)
+#  define BX_APPNAME "QEMU"
+#elif defined(PLEX86)
+#  define BX_APPNAME "Plex86"
+#else
+#  define BX_APPNAME "Bochs"
+#endif
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_dom_core.c
--- a/tools/libxc/xc_dom_core.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_dom_core.c Wed Dec 24 12:52:34 2008 +0900
@@ -244,6 +244,7 @@ int xc_dom_do_gunzip(void *src, size_t s
         return -1;
     }
     rc = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
     if ( rc != Z_STREAM_END )
     {
         xc_dom_panic(XC_INTERNAL_ERROR,
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_dom_x86.c
--- a/tools/libxc/xc_dom_x86.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_dom_x86.c  Wed Dec 24 12:52:34 2008 +0900
@@ -418,7 +418,8 @@ static int start_info_x86_32(struct xc_d
     xc_dom_printf("%s: called\n", __FUNCTION__);
 
     memset(start_info, 0, sizeof(*start_info));
-    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
     start_info->nr_pages = dom->total_pages;
     start_info->shared_info = shinfo << PAGE_SHIFT_X86;
     start_info->pt_base = dom->pgtables_seg.vstart;
@@ -457,7 +458,8 @@ static int start_info_x86_64(struct xc_d
     xc_dom_printf("%s: called\n", __FUNCTION__);
 
     memset(start_info, 0, sizeof(*start_info));
-    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
     start_info->nr_pages = dom->total_pages;
     start_info->shared_info = shinfo << PAGE_SHIFT_X86;
     start_info->pt_base = dom->pgtables_seg.vstart;
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain.c   Wed Dec 24 12:52:34 2008 +0900
@@ -1061,6 +1061,20 @@ int xc_domain_suppress_spurious_page_fau
 
 }
 
+int xc_domain_debug_control(int xc, uint32_t domid, uint32_t sop, uint32_t 
vcpu)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = (domid_t)domid;
+    domctl.cmd = XEN_DOMCTL_debug_op;
+    domctl.u.debug_op.op     = sop;
+    domctl.u.debug_op.vcpu   = vcpu;
+
+    return do_domctl(xc, &domctl);
+}
+
+
 /*
  * Local variables:
  * mode: C
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain_restore.c   Wed Dec 24 12:52:34 2008 +0900
@@ -490,6 +490,22 @@ int xc_domain_restore(int xc_handle, int
             continue;
         }
 
+        if ( j == -4 )
+        {
+            uint64_t vm86_tss;
+
+            /* Skip padding 4 bytes then read the vm86 TSS location. */
+            if ( read_exact(io_fd, &vm86_tss, sizeof(uint32_t)) ||
+                 read_exact(io_fd, &vm86_tss, sizeof(uint64_t)) )
+            {
+                ERROR("error read the address of the vm86 TSS");
+                goto out;
+            }
+
+            xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss);
+            continue;
+        }
+
         if ( j == 0 )
             break;  /* our work here is done */
 
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain_save.c      Wed Dec 24 12:52:34 2008 +0900
@@ -1388,18 +1388,30 @@ int xc_domain_save(int xc_handle, int io
     if ( hvm )
     {
         struct {
-            int minusthree;
+            int id;
             uint32_t pad;
-            uint64_t ident_pt;
-        } chunk = { -3, 0 };
-
+            uint64_t data;
+        } chunk = { 0, };
+
+        chunk.id = -3;
         xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                         (unsigned long *)&chunk.ident_pt);
-
-        if ( (chunk.ident_pt != 0) &&
+                         (unsigned long *)&chunk.data);
+
+        if ( (chunk.data != 0) &&
              write_exact(io_fd, &chunk, sizeof(chunk)) )
         {
             PERROR("Error when writing the ident_pt for EPT guest");
+            goto out;
+        }
+
+        chunk.id = -4;
+        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS,
+                         (unsigned long *)&chunk.data);
+
+        if ( (chunk.data != 0) &&
+             write_exact(io_fd, &chunk, sizeof(chunk)) )
+        {
+            PERROR("Error when writing the vm86 TSS for guest");
             goto out;
         }
     }
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_pm.c
--- a/tools/libxc/xc_pm.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_pm.c       Wed Dec 24 12:52:34 2008 +0900
@@ -23,8 +23,13 @@
  *
  */
 
+#include <errno.h>
+#include <stdbool.h>
 #include "xc_private.h"
 
+/*
+ * Get PM statistic info
+ */
 int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px)
 {
     DECLARE_SYSCTL;
@@ -168,3 +173,136 @@ int xc_pm_reset_cxstat(int xc_handle, in
 
     return xc_sysctl(xc_handle, &sysctl);
 }
+
+
+/*
+ * 1. Get PM parameter
+ * 2. Provide user PM control
+ */
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+                        struct xc_get_cpufreq_para *user_para)
+{
+    DECLARE_SYSCTL;
+    int ret = 0;
+    struct xen_get_cpufreq_para *sys_para = &sysctl.u.pm_op.get_para;
+    bool has_num = user_para->cpu_num &&
+                     user_para->freq_num &&
+                     user_para->gov_num;
+
+    if ( (xc_handle < 0) || !user_para )
+        return -EINVAL;
+
+    if ( has_num )
+    {
+        if ( (!user_para->affected_cpus)                    ||
+             (!user_para->scaling_available_frequencies)    ||
+             (!user_para->scaling_available_governors) )
+            return -EINVAL;
+
+        if ( (ret = lock_pages(user_para->affected_cpus,
+                               user_para->cpu_num * sizeof(uint32_t))) )
+            goto unlock_1;
+        if ( (ret = lock_pages(user_para->scaling_available_frequencies,
+                               user_para->freq_num * sizeof(uint32_t))) )
+            goto unlock_2;
+        if ( (ret = lock_pages(user_para->scaling_available_governors,
+                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+            goto unlock_3;
+
+        set_xen_guest_handle(sys_para->affected_cpus,
+                             user_para->affected_cpus);
+        set_xen_guest_handle(sys_para->scaling_available_frequencies,
+                             user_para->scaling_available_frequencies);
+        set_xen_guest_handle(sys_para->scaling_available_governors,
+                             user_para->scaling_available_governors);
+    }
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
+    sysctl.u.pm_op.cpuid = cpuid;
+    sys_para->cpu_num  = user_para->cpu_num;
+    sys_para->freq_num = user_para->freq_num;
+    sys_para->gov_num  = user_para->gov_num;
+
+    ret = xc_sysctl(xc_handle, &sysctl);
+    if ( ret )
+    {
+        if ( errno == EAGAIN )
+        {
+            user_para->cpu_num  = sys_para->cpu_num;
+            user_para->freq_num = sys_para->freq_num;
+            user_para->gov_num  = sys_para->gov_num;
+            ret = -errno;
+        }
+
+        if ( has_num )
+            goto unlock_4;
+        goto unlock_1;
+    }
+    else
+    {
+        user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
+        user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
+        user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
+        user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
+        user_para->scaling_max_freq = sys_para->scaling_max_freq;
+        user_para->scaling_min_freq = sys_para->scaling_min_freq;
+
+        memcpy(user_para->scaling_driver, 
+                sys_para->scaling_driver, CPUFREQ_NAME_LEN);
+        memcpy(user_para->scaling_governor,
+                sys_para->scaling_governor, CPUFREQ_NAME_LEN);
+
+        /* copy to user_para no matter what cpufreq governor */
+        XC_BUILD_BUG_ON(sizeof(((struct xc_get_cpufreq_para *)0)->u) !=
+                        sizeof(((struct xen_get_cpufreq_para *)0)->u));
+
+        memcpy(&user_para->u, &sys_para->u, sizeof(sys_para->u));
+    }
+
+unlock_4:
+    unlock_pages(user_para->scaling_available_governors,
+                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char));
+unlock_3:
+    unlock_pages(user_para->scaling_available_frequencies,
+                 user_para->freq_num * sizeof(uint32_t));
+unlock_2:
+    unlock_pages(user_para->affected_cpus,
+                 user_para->cpu_num * sizeof(uint32_t));
+unlock_1:
+    return ret;
+}
+
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname)
+{
+    DECLARE_SYSCTL;
+    char *scaling_governor = sysctl.u.pm_op.set_gov.scaling_governor;
+
+    if ( (xc_handle < 0) || (!govname) )
+        return -EINVAL;
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = SET_CPUFREQ_GOV;
+    sysctl.u.pm_op.cpuid = cpuid;
+    strncpy(scaling_governor, govname, CPUFREQ_NAME_LEN);
+    scaling_governor[CPUFREQ_NAME_LEN - 1] = '\0';
+
+    return xc_sysctl(xc_handle, &sysctl);
+}
+
+int xc_set_cpufreq_para(int xc_handle, int cpuid, 
+                        int ctrl_type, int ctrl_value)
+{
+    DECLARE_SYSCTL;
+
+    if ( xc_handle < 0 )
+        return -EINVAL;
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = SET_CPUFREQ_PARA;
+    sysctl.u.pm_op.cpuid = cpuid;
+    sysctl.u.pm_op.set_para.ctrl_type = ctrl_type;
+    sysctl.u.pm_op.set_para.ctrl_value = ctrl_value;
+
+    return xc_sysctl(xc_handle, &sysctl);
+}
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_private.h  Wed Dec 24 12:52:34 2008 +0900
@@ -42,6 +42,9 @@
 #define DEBUG    1
 #define INFO     1
 #define PROGRESS 0
+
+/* Force a compilation error if condition is true */
+#define XC_BUILD_BUG_ON(p) ((void)sizeof(struct { int:-!!(p); }))
 
 /*
 ** Define max dirty page cache to permit during save/restore -- need to 
balance 
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_ptrace.c   Wed Dec 24 12:52:34 2008 +0900
@@ -524,10 +524,20 @@ xc_ptrace(
         /*  XXX we can still have problems if the user switches threads
          *  during single-stepping - but that just seems retarded
          */
-        ctxt[cpu].c.user_regs.eflags |= PSL_T;
-        if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
-                                &ctxt[cpu])))
-            goto out_error_domctl;
+        /* Try to enalbe Monitor Trap Flag for HVM, and fall back to TF
+         * if no MTF support
+         */
+        if ( !current_is_hvm ||
+             xc_domain_debug_control(xc_handle,
+                                     current_domid,
+                                     XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON,
+                                     cpu) )
+        {
+            ctxt[cpu].c.user_regs.eflags |= PSL_T;
+            if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
+                                    &ctxt[cpu])))
+                goto out_error_domctl;
+        }
         /* FALLTHROUGH */
 
     case PTRACE_CONT:
@@ -538,15 +548,22 @@ xc_ptrace(
         {
             FOREACH_CPU(cpumap, index) {
                 cpu = index - 1;
-                if (fetch_regs(xc_handle, cpu, NULL))
-                    goto out_error;
-                /* Clear trace flag */
-                if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+                if ( !current_is_hvm ||
+                      xc_domain_debug_control(xc_handle,
+                                              current_domid,
+                                              
XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF,
+                                              cpu) )
                 {
-                    ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
-                    if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
-                                                cpu, &ctxt[cpu])))
-                        goto out_error_domctl;
+                    if (fetch_regs(xc_handle, cpu, NULL))
+                        goto out_error;
+                    /* Clear trace flag */
+                    if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+                    {
+                        ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
+                        if ((retval = xc_vcpu_setcontext(xc_handle, 
current_domid,
+                                        cpu, &ctxt[cpu])))
+                            goto out_error_domctl;
+                    }
                 }
             }
         }
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xenctrl.h     Wed Dec 24 12:52:34 2008 +0900
@@ -1111,6 +1111,12 @@ int xc_domain_set_target(int xc_handle,
                          uint32_t domid,
                          uint32_t target);
 
+/* Control the domain for debug */
+int xc_domain_debug_control(int xc_handle,
+                            uint32_t domid,
+                            uint32_t sop,
+                            uint32_t vcpu);
+
 #if defined(__i386__) || defined(__x86_64__)
 int xc_cpuid_check(int xc,
                    const unsigned int *input,
@@ -1161,4 +1167,46 @@ int xc_pm_reset_cxstat(int xc_handle, in
 
 int xc_cpu_online(int xc_handle, int cpu);
 int xc_cpu_offline(int xc_handle, int cpu);
+
+/* 
+ * cpufreq para name of this structure named 
+ * same as sysfs file name of native linux
+ */
+typedef xen_userspace_t xc_userspace_t;
+typedef xen_ondemand_t xc_ondemand_t;
+
+struct xc_get_cpufreq_para {
+    /* IN/OUT variable */
+    uint32_t cpu_num;
+    uint32_t freq_num;
+    uint32_t gov_num;
+
+    /* for all governors */
+    /* OUT variable */
+    uint32_t *affected_cpus;
+    uint32_t *scaling_available_frequencies;
+    char     *scaling_available_governors;
+    char scaling_driver[CPUFREQ_NAME_LEN];
+
+    uint32_t cpuinfo_cur_freq;
+    uint32_t cpuinfo_max_freq;
+    uint32_t cpuinfo_min_freq;
+    uint32_t scaling_cur_freq;
+
+    char scaling_governor[CPUFREQ_NAME_LEN];
+    uint32_t scaling_max_freq;
+    uint32_t scaling_min_freq;
+
+    /* for specific governor */
+    union {
+        xc_userspace_t userspace;
+        xc_ondemand_t ondemand;
+    } u;
+};
+
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+                        struct xc_get_cpufreq_para *user_para);
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname);
+int xc_set_cpufreq_para(int xc_handle, int cpuid,
+                        int ctrl_type, int ctrl_value);
 #endif /* XENCTRL_H */
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xg_private.c  Wed Dec 24 12:52:34 2008 +0900
@@ -131,6 +131,7 @@ char *xc_inflate_buffer(const char *in_b
 
     /* Inflate in one pass/call */
     sts = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
     if ( sts != Z_STREAM_END )
     {
         ERROR("inflate failed, sts %d\n", sts);
diff -r 9837303a4708 -r 07f26e047fbf tools/misc/xen-detect.c
--- a/tools/misc/xen-detect.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/misc/xen-detect.c   Wed Dec 24 12:52:34 2008 +0900
@@ -50,17 +50,25 @@ static int check_for_xen(void)
 {
     uint32_t eax, ebx, ecx, edx;
     char signature[13];
+    uint32_t base;
 
-    cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
-    *(uint32_t *)(signature + 0) = ebx;
-    *(uint32_t *)(signature + 4) = ecx;
-    *(uint32_t *)(signature + 8) = edx;
-    signature[12] = '\0';
+    for ( base = 0x40000000; base < 0x40001000; base += 0x100 )
+    {
+        cpuid(base, &eax, &ebx, &ecx, &edx);
 
-    if ( strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002) )
-        return 0;
+        *(uint32_t *)(signature + 0) = ebx;
+        *(uint32_t *)(signature + 4) = ecx;
+        *(uint32_t *)(signature + 8) = edx;
+        signature[12] = '\0';
 
-    cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
+        if ( !strcmp("XenVMMXenVMM", signature) && (eax >= (base + 2)) )
+            goto found;
+    }
+
+    return 0;
+
+ found:
+    cpuid(base + 1, &eax, &ebx, &ecx, &edx);
     printf("Running in %s context on Xen v%d.%d.\n",
            pv_context ? "PV" : "HVM", (uint16_t)(eax >> 16), (uint16_t)eax);
     return 1;
diff -r 9837303a4708 -r 07f26e047fbf tools/misc/xenpm.c
--- a/tools/misc/xenpm.c        Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/misc/xenpm.c        Wed Dec 24 12:52:34 2008 +0900
@@ -16,199 +16,591 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
+/* to eliminate warning on `strndup' */
+#define _GNU_SOURCE
+
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <getopt.h>
 #include <errno.h>
 
 #include <xenctrl.h>
 #include <inttypes.h>
 
-int main(int argc, char **argv)
-{
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+/* help message */
+void show_help(void)
+{
+    fprintf(stderr,
+            "Usage:\n"
+            "       xenpm get-cpuidle-states [cpuid]: list cpu idle 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu 
frequency <HZ> on CPU <cpuid>.\n"
+            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu 
frequency <HZ> on CPU <cpuid>.\n"
+            "       xenpm set-scaling-governor <cpuid> <name>: set scaling 
governor on CPU <cpuid>.\n"
+            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed 
on CPU <cpuid>.\n"
+            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate 
on CPU <cpuid>.\n"
+            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on 
CPU <cpuid>.\n");
+}
+
+/* wrapper function */
+int help_func(int xc_fd, int cpuid, uint32_t value)
+{
+    show_help();
+    return 0;
+}
+
+/* show cpu idle information on CPU cpuid */
+static int show_cx_cpuid(int xc_fd, int cpuid)
+{
+    int i, ret = 0;
+    int max_cx_num = 0;
+    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
+
+    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
+    if ( ret )
+    {
+        if ( errno == ENODEV )
+        {
+            fprintf(stderr, "Xen cpuidle is not enabled!\n");
+            return -ENODEV;
+        }
+        else
+        {
+            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
+            return -EINVAL;
+        }
+    }
+
+    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->triggers )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", 
cpuid);
+        return -ENOMEM;
+    }
+    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->residencies )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", 
cpuid);
+        free(cxstat->triggers);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
+    if( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
+                "information\n", cpuid);
+        free(cxstat->triggers);
+        free(cxstat->residencies);
+        return -EINVAL;
+    }
+
+    printf("cpu id               : %d\n", cpuid);
+    printf("total C-states       : %d\n", cxstat->nr);
+    printf("idle time(ms)        : %"PRIu64"\n",
+           cxstat->idle_time/1000000UL);
+    for ( i = 0; i < cxstat->nr; i++ )
+    {
+        printf("C%d                   : transition [%020"PRIu64"]\n",
+               i, cxstat->triggers[i]);
+        printf("                       residency  [%020"PRIu64" ms]\n",
+               cxstat->residencies[i]/1000000UL);
+    }
+
+    free(cxstat->triggers);
+    free(cxstat->residencies);
+
+    printf("\n");
+    return 0;
+}
+
+int cxstates_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        /* show cxstates on all cpu */
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_cx_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+/* show cpu frequency information on CPU cpuid */
+static int show_px_cpuid(int xc_fd, int cpuid)
+{
+    int i, ret = 0;
+    int max_px_num = 0;
+    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
+
+    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    if ( ret )
+    {
+        if ( errno == ENODEV )
+        {
+            printf("Xen cpufreq is not enabled!\n");
+            return -ENODEV;
+        }
+        else
+        {
+            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
+            return -EINVAL;
+        }
+    }
+
+    pxstat->trans_pt = malloc(max_px_num * max_px_num *
+                              sizeof(uint64_t));
+    if ( !pxstat->trans_pt )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition 
table\n", cpuid);
+        return -ENOMEM;
+    }
+    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
+    if ( !pxstat->pt )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", 
cpuid);
+        free(pxstat->trans_pt);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
+    if( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to get P-states statistics 
information\n", cpuid);
+        free(pxstat->trans_pt);
+        free(pxstat->pt);
+        return -ENOMEM;
+    }
+
+    printf("cpu id               : %d\n", cpuid);
+    printf("total P-states       : %d\n", pxstat->total);
+    printf("usable P-states      : %d\n", pxstat->usable);
+    printf("current frequency    : %"PRIu64" MHz\n",
+           pxstat->pt[pxstat->cur].freq);
+    for ( i = 0; i < pxstat->total; i++ )
+    {
+        if ( pxstat->cur == i )
+            printf("*P%d", i);
+        else
+            printf("P%d ", i);
+        printf("                  : freq       [%04"PRIu64" MHz]\n",
+               pxstat->pt[i].freq);
+        printf("                       transition [%020"PRIu64"]\n",
+               pxstat->pt[i].count);
+        printf("                       residency  [%020"PRIu64" ms]\n",
+               pxstat->pt[i].residency/1000000UL);
+    }
+
+    free(pxstat->trans_pt);
+    free(pxstat->pt);
+
+    printf("\n");
+    return 0;
+}
+
+int pxstates_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_px_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+/* print out parameters about cpu frequency */
+static void print_cpufreq_para(int cpuid, struct xc_get_cpufreq_para 
*p_cpufreq)
+{
+    int i;
+
+    printf("cpu id               : %d\n", cpuid);
+
+    printf("affected_cpus        :");
+    for ( i = 0; i < p_cpufreq->cpu_num; i++ )
+        if ( i == cpuid )
+            printf(" *%d", p_cpufreq->affected_cpus[i]);
+        else
+            printf(" %d", p_cpufreq->affected_cpus[i]);
+    printf("\n");
+
+    printf("cpuinfo frequency    : max [%u] min [%u] cur [%u]\n",
+           p_cpufreq->cpuinfo_max_freq,
+           p_cpufreq->cpuinfo_min_freq,
+           p_cpufreq->cpuinfo_cur_freq);
+
+    printf("scaling_driver       : %s\n", p_cpufreq->scaling_driver);
+
+    printf("scaling_avail_gov    : %s\n",
+           p_cpufreq->scaling_available_governors);
+
+    printf("current_governor     : %s\n", p_cpufreq->scaling_governor);
+    if ( !strncmp(p_cpufreq->scaling_governor,
+                  "userspace", CPUFREQ_NAME_LEN) )
+    {
+        printf("  userspace specific :\n");
+        printf("    scaling_setspeed : %u\n",
+               p_cpufreq->u.userspace.scaling_setspeed);
+    }
+    else if ( !strncmp(p_cpufreq->scaling_governor,
+                       "ondemand", CPUFREQ_NAME_LEN) )
+    {
+        printf("  ondemand specific  :\n");
+        printf("    sampling_rate    : max [%u] min [%u] cur [%u]\n",
+               p_cpufreq->u.ondemand.sampling_rate_max,
+               p_cpufreq->u.ondemand.sampling_rate_min,
+               p_cpufreq->u.ondemand.sampling_rate);
+        printf("    up_threshold     : %u\n",
+               p_cpufreq->u.ondemand.up_threshold);
+    }
+
+    printf("scaling_avail_freq   :");
+    for ( i = 0; i < p_cpufreq->freq_num; i++ )
+        if ( p_cpufreq->scaling_available_frequencies[i] == 
p_cpufreq->scaling_cur_freq )
+            printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
+        else
+            printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
+    printf("\n");
+
+    printf("scaling frequency    : max [%u] min [%u] cur [%u]\n",
+           p_cpufreq->scaling_max_freq,
+           p_cpufreq->scaling_min_freq,
+           p_cpufreq->scaling_cur_freq);
+    printf("\n");
+}
+
+/* show cpu frequency parameters information on CPU cpuid */
+static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
+{
+    int ret = 0;
+    struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
+
+    p_cpufreq->cpu_num = 0;
+    p_cpufreq->freq_num = 0;
+    p_cpufreq->gov_num = 0;
+    p_cpufreq->affected_cpus = NULL;
+    p_cpufreq->scaling_available_frequencies = NULL;
+    p_cpufreq->scaling_available_governors = NULL;
+
+    do
+    {
+        free(p_cpufreq->affected_cpus);
+        free(p_cpufreq->scaling_available_frequencies);
+        free(p_cpufreq->scaling_available_governors);
+
+        p_cpufreq->affected_cpus = NULL;
+        p_cpufreq->scaling_available_frequencies = NULL;
+        p_cpufreq->scaling_available_governors = NULL;
+
+        if (!(p_cpufreq->affected_cpus =
+              malloc(p_cpufreq->cpu_num * sizeof(uint32_t))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for affected_cpus\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+        if (!(p_cpufreq->scaling_available_frequencies =
+              malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for 
scaling_available_frequencies\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+        if (!(p_cpufreq->scaling_available_governors =
+              malloc(p_cpufreq->gov_num * CPUFREQ_NAME_LEN * sizeof(char))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for 
scaling_available_governors\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+
+        ret = xc_get_cpufreq_para(xc_fd, cpuid, p_cpufreq);
+    } while ( ret && errno == EAGAIN );
+
+    if ( ret == 0 )
+        print_cpufreq_para(cpuid, p_cpufreq);
+    else if ( errno == ENODEV )
+    {
+        ret = -ENODEV;
+        fprintf(stderr, "Xen cpufreq is not enabled!\n");
+    }
+    else
+        fprintf(stderr,
+                "[CPU%d] failed to get cpufreq parameter\n",
+                cpuid);
+
+out:
+    free(p_cpufreq->scaling_available_governors);
+    free(p_cpufreq->scaling_available_frequencies);
+    free(p_cpufreq->affected_cpus);
+
+    return ret;
+}
+
+int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
+    if ( ret ) 
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", 
cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_governor_func(int xc_fd, int cpuid, char *name)
+{
+    int ret = 0;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
+    if ( ret )
+    {
+        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
+    }
+
+    return ret;
+}
+
+struct {
+    const char *name;
+    int (*function)(int xc_fd, int cpuid, uint32_t value);
+} main_options[] = {
+    { "help", help_func },
+    { "get-cpuidle-states", cxstates_func },
+    { "get-cpufreq-states", pxstates_func },
+    { "get-cpufreq-para", cpufreq_para_func },
+    { "set-scaling-maxfreq", scaling_max_freq_func },
+    { "set-scaling-minfreq", scaling_min_freq_func },
+    { "set-scaling-governor", NULL },
+    { "set-scaling-speed", scaling_speed_func },
+    { "set-sampling-rate", scaling_sampling_rate_func },
+    { "set-up-threshold", scaling_up_threshold_func },
+};
+
+int main(int argc, char *argv[])
+{
+    int i, ret = -EINVAL;
     int xc_fd;
-    int i, j, ret = 0;
-    int cinfo = 0, pinfo = 0;
-    int ch;
-    xc_physinfo_t physinfo = { 0 };
-
-    while ( (ch = getopt(argc, argv, "cp")) != -1 )
-    {
-        switch ( ch )
-        {
-        case 'c':
-            cinfo = 1;
-            break;
-        case 'p':
-            pinfo = 1;
-            break;
-        default:
-            fprintf(stderr, "%s [-p] [-c]\n", argv[0]);
-            return -1;
-        }
-    }
-
-    if ( !cinfo && !pinfo )
-    {
-        cinfo = 1;
-        pinfo = 1;
+    int cpuid = -1;
+    uint32_t value = 0;
+    int nr_matches = 0;
+    int matches_main_options[ARRAY_SIZE(main_options)];
+
+    if ( argc < 2 )
+    {
+        show_help();
+        return ret;
+    }
+
+    if ( argc > 2 )
+    {
+        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
+            cpuid = -1;
     }
 
     xc_fd = xc_interface_open();
     if ( xc_fd < 0 )
     {
         fprintf(stderr, "failed to get the handler\n");
-        return xc_fd;
-    }
-
-    ret = xc_physinfo(xc_fd, &physinfo);
-    if ( ret )
-    {
-        fprintf(stderr, "failed to get the processor information\n");
-        xc_interface_close(xc_fd);
-        return ret;
-    }
-
-    /* print out the C state information */
-    if ( cinfo )
-    {
-        int max_cx_num = 0;
-        struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
-
-        for ( i = 0; i < physinfo.nr_cpus; i++ )
-        {
-            ret = xc_pm_get_max_cx(xc_fd, i, &max_cx_num);
-            if ( ret )
+    }
+
+    for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
+    {
+        if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
+        {
+            matches_main_options[nr_matches++] = i;
+        }
+    }
+
+    if ( nr_matches > 1 )
+    {
+        fprintf(stderr, "Ambigious options: ");
+        for ( i = 0; i < nr_matches; i++ )
+            fprintf(stderr, " %s", main_options[matches_main_options[i]].name);
+        fprintf(stderr, "\n");
+    }
+    else if ( nr_matches == 1 )
+    {
+        if ( !strcmp("set-scaling-governor", 
main_options[matches_main_options[0]].name) )
+        {
+            char *name = strdup(argv[3]);
+            ret = scaling_governor_func(xc_fd, cpuid, name);
+            free(name);
+        }
+        else
+        {
+            if ( argc > 3 )
             {
-                if ( errno == ENODEV )
-                {
-                    fprintf(stderr, "Xen cpuidle is not enabled!\n");
-                    break;
-                }
-                else
-                {
-                    fprintf(stderr, "[CPU%d] failed to get max C-state\n", i);
-                    continue;
-                }
+                if ( sscanf(argv[3], "%d", &value) != 1 )
+                    value = 0;
             }
-
-            cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
-            if ( !cxstat->triggers )
-            {
-                fprintf(stderr, "failed to malloc for C-states triggers\n");
-                break;
-            }
-            cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
-            if ( !cxstat->residencies )
-            {
-                fprintf(stderr, "failed to malloc for C-states residencies\n");
-                free(cxstat->triggers);
-                break;
-            }
-
-            ret = xc_pm_get_cxstat(xc_fd, i, cxstat);
-            if( ret )
-            {
-                fprintf(stderr, "[CPU%d] failed to get C-states statistics "
-                        "information\n", i);
-                free(cxstat->triggers);
-                free(cxstat->residencies);
-                continue;
-            }
-
-            printf("cpu id               : %d\n", i);
-            printf("total C-states       : %d\n", cxstat->nr);
-            printf("idle time(ms)        : %"PRIu64"\n",
-                   cxstat->idle_time/1000000UL);
-            for ( j = 0; j < cxstat->nr; j++ )
-            {
-                printf("C%d                   : transition [%020"PRIu64"]\n",
-                       j, cxstat->triggers[j]);
-                printf("                       residency  [%020"PRIu64" ms]\n",
-                       cxstat->residencies[j]*1000000UL/3579/1000000UL);
-            }
-
-            free(cxstat->triggers);
-            free(cxstat->residencies);
-
-            printf("\n");
-        }
-    }
-
-    /* print out P state information */
-    if ( pinfo )
-    {
-        int max_px_num = 0;
-        struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
-
-        for ( i = 0; i < physinfo.nr_cpus; i++ )
-        {
-            ret = xc_pm_get_max_px(xc_fd, i, &max_px_num);
-            if ( ret )
-            {
-                if ( errno == ENODEV )
-                {
-                    printf("Xen cpufreq is not enabled!\n");
-                    break;
-                }
-                else
-                {
-                    fprintf(stderr, "[CPU%d] failed to get max P-state\n", i);
-                    continue;
-                }
-            }
-
-            pxstat->trans_pt = malloc(max_px_num * max_px_num *
-                                      sizeof(uint64_t));
-            if ( !pxstat->trans_pt )
-            {
-                fprintf(stderr, "failed to malloc for P-states "
-                        "transition table\n");
-                break;
-            }
-            pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
-            if ( !pxstat->pt )
-            {
-                fprintf(stderr, "failed to malloc for P-states table\n");
-                free(pxstat->trans_pt);
-                break;
-            }
-
-            ret = xc_pm_get_pxstat(xc_fd, i, pxstat);
-            if( ret )
-            {
-                fprintf(stderr, "[CPU%d] failed to get P-states "
-                        "statistics information\n", i);
-                free(pxstat->trans_pt);
-                free(pxstat->pt);
-                continue;
-            }
-
-            printf("cpu id               : %d\n", i);
-            printf("total P-states       : %d\n", pxstat->total);
-            printf("usable P-states      : %d\n", pxstat->usable);
-            printf("current frequency    : %"PRIu64" MHz\n",
-                   pxstat->pt[pxstat->cur].freq);
-            for ( j = 0; j < pxstat->total; j++ )
-            {
-                if ( pxstat->cur == j )
-                    printf("*P%d", j);
-                else
-                    printf("P%d ", j);
-                printf("                  : freq       [%04"PRIu64" MHz]\n",
-                       pxstat->pt[j].freq);
-                printf("                       transition [%020"PRIu64"]\n",
-                       pxstat->pt[j].count);
-                printf("                       residency  [%020"PRIu64" ms]\n",
-                       pxstat->pt[j].residency/1000000UL);
-            }
-
-            free(pxstat->trans_pt);
-            free(pxstat->pt);
-
-            printf("\n");
-        }
-    }
+            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, 
value);
+        }
+    }
+    else
+        show_help();
 
     xc_interface_close(xc_fd);
     return ret;
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/acm/acm.c
--- a/tools/python/xen/lowlevel/acm/acm.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/acm/acm.c       Wed Dec 24 12:52:34 2008 +0900
@@ -68,6 +68,8 @@ static void *__getssid(int domid, uint32
         goto out2;
     } else {
         *buflen = SSID_BUFFER_SIZE;
+        free(buf);
+        buf = NULL;
         goto out2;
     }
  out2:
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/flask/flask.c
--- a/tools/python/xen/lowlevel/flask/flask.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/flask/flask.c   Wed Dec 24 12:52:34 2008 +0900
@@ -55,6 +55,7 @@ static PyObject *pyflask_context_to_sid(
     xc_handle = xc_interface_open();
     if (xc_handle < 0) {
         errno = xc_handle;
+        free(buf);
         return PyErr_SetFromErrno(xc_error_obj);
     }
     
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Dec 24 12:52:34 2008 +0900
@@ -678,19 +678,22 @@ static PyObject *pyxc_get_device_group(X
 
     if ( rc < 0 )
     {
-      free(sdev_array); 
-      return pyxc_error_to_exception();
+        free(sdev_array); 
+        return pyxc_error_to_exception();
     }
 
     if ( !num_sdevs )
     {
-       free(sdev_array);
-       return Py_BuildValue("s", "");
+        free(sdev_array);
+        return Py_BuildValue("s", "");
     }
 
     group_str = calloc(num_sdevs, sizeof(dev_str));
     if (group_str == NULL)
+    {
+        free(sdev_array);
         return PyErr_NoMemory();
+    }
 
     for ( i = 0; i < num_sdevs; i++ )
     {
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/xs/xs.c Wed Dec 24 12:52:34 2008 +0900
@@ -336,15 +336,19 @@ static PyObject *xspy_set_permissions(Xs
        xs_set_error(EINVAL);
         goto exit;
     }
+
     xsperms_n = PyList_Size(perms);
-    xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
+    /* NB. alloc +1 so we can change the owner if necessary. */
+    xsperms = calloc(xsperms_n + 1, sizeof(struct xs_permissions));
     if (!xsperms) {
        xs_set_error(ENOMEM);
         goto exit;
     }
+
     tuple0 = PyTuple_New(0);
     if (!tuple0)
         goto exit;
+
     for (i = 0; i < xsperms_n; i++) {
         /* Read/write perms. Set these. */
         int p_read = 0, p_write = 0;
@@ -357,6 +361,17 @@ static PyObject *xspy_set_permissions(Xs
         if (p_write)
             xsperms[i].perms |= XS_PERM_WRITE;
     }
+
+    /*
+     * Is the caller trying to restrict access to the first specified
+     * domain? If so then it cannot be owner, so we force dom0 as owner.
+     */
+    if (xsperms_n && xsperms[0].perms && xsperms[0].id) {
+        memmove(&xsperms[1], &xsperms[0], xsperms_n * sizeof(*xsperms));
+        xsperms[0].id = xsperms[0].perms = 0;
+        xsperms_n++;
+    }
+
     Py_BEGIN_ALLOW_THREADS
     result = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
     Py_END_ALLOW_THREADS
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendCheckpoint.py   Wed Dec 24 12:52:34 2008 +0900
@@ -253,7 +253,7 @@ def restore(xd, fd, dominfo = None, paus
         # set memory limit
         xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
 
-        balloon.free(memory + shadow)
+        balloon.free(memory + shadow, dominfo)
 
         shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
         dominfo.info['shadow_memory'] = shadow_cur
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Dec 24 12:52:34 2008 +0900
@@ -1289,7 +1289,6 @@ class XendConfig(dict):
                     pass
 
             if dev_type == 'vbd':
-                dev_info['bootable'] = 0
                 if dev_info.get('dev', '').startswith('ioemu:'):
                     dev_info['driver'] = 'ioemu'
                 else:
@@ -1325,7 +1324,7 @@ class XendConfig(dict):
                 if param not in target:
                     target[param] = []
                 if dev_uuid not in target[param]:
-                    if dev_type == 'vbd':
+                    if dev_type == 'vbd' and 'bootable' not in dev_info:
                         # Compat hack -- mark first disk bootable
                         dev_info['bootable'] = int(not target[param])
                     target[param].append(dev_uuid)
@@ -1333,8 +1332,9 @@ class XendConfig(dict):
                 if 'vbd_refs' not in target:
                     target['vbd_refs'] = []
                 if dev_uuid not in target['vbd_refs']:
-                    # Compat hack -- mark first disk bootable
-                    dev_info['bootable'] = int(not target['vbd_refs'])
+                    if 'bootable' not in dev_info:
+                        # Compat hack -- mark first disk bootable
+                        dev_info['bootable'] = int(not target['vbd_refs'])
                     target['vbd_refs'].append(dev_uuid)
                     
             elif dev_type == 'vfb':
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Dec 24 12:52:34 2008 +0900
@@ -517,7 +517,8 @@ class XendDomainInfo:
         # HVM domain shuts itself down only if it has PV drivers
         if self.info.is_hvm():
             hvm_pvdrv = xc.hvm_get_param(self.domid, HVM_PARAM_CALLBACK_IRQ)
-            if not hvm_pvdrv:
+            hvm_s_state = xc.hvm_get_param(self.domid, HVM_PARAM_ACPI_S_STATE)
+            if not hvm_pvdrv or hvm_s_state != 0:
                 code = REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
                 log.info("HVM save:remote shutdown dom %d!", self.domid)
                 xc.domain_shutdown(self.domid, code)
@@ -2104,7 +2105,7 @@ class XendDomainInfo:
         # overhead is greater for some types of domain than others. For
         # example, an x86 HVM domain will have a default shadow-pagetable
         # allocation of 1MB. We free up 2MB here to be on the safe side.
-        balloon.free(2*1024) # 2MB should be plenty
+        balloon.free(2*1024, self) # 2MB should be plenty
 
         ssidref = 0
         if security.on() == xsconstants.XS_POLICY_USE:
@@ -2298,7 +2299,7 @@ class XendDomainInfo:
             vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
 
             # Make sure there's enough RAM available for the domain
-            balloon.free(memory + shadow + vtd_mem)
+            balloon.free(memory + shadow + vtd_mem, self)
 
             # Set up the shadow memory
             shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
@@ -2715,7 +2716,7 @@ class XendDomainInfo:
             # The domain might already have some shadow memory
             overhead_kb -= xc.shadow_mem_control(self.domid) * 1024
         if overhead_kb > 0:
-            balloon.free(overhead_kb)
+            balloon.free(overhead_kb, self)
 
     def _unwatchVm(self):
         """Remove the watch on the VM path, if any.  Idempotent.  Nothrow
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/balloon.py  Wed Dec 24 12:52:34 2008 +0900
@@ -67,7 +67,7 @@ def get_dom0_target_alloc():
         raise VmError('Failed to query target memory allocation of dom0.')
     return kb
 
-def free(need_mem):
+def free(need_mem ,self):
     """Balloon out memory from the privileged domain so that there is the
     specified required amount (in KiB) free.
     """
@@ -121,6 +121,40 @@ def free(need_mem):
             max_free_mem = total_mem - dom0_alloc
         if need_mem >= max_free_mem:
             retries = rlimit
+
+        # Check whethercurrent machine is a numa system and the new 
+        # created hvm has all its vcpus in the same node, if all the 
+        # conditions above are fit. We will wait until all the pages 
+        # in scrub list are freed (if waiting time go beyond 20s, 
+        # we will stop waiting it.)
+        if physinfo['nr_nodes'] > 1 and retries == 0:
+            oldnode = -1
+            waitscrub = 1
+            vcpus = self.info['cpus'][0]
+            for vcpu in vcpus:
+                nodenum = 0
+                for node in physinfo['node_to_cpu']:
+                    for cpu in node:
+                        if vcpu == cpu:
+                            if oldnode == -1:
+                                oldnode = nodenum
+                            elif oldnode != nodenum:
+                                waitscrub = 0
+                    nodenum = nodenum + 1
+
+            if waitscrub == 1 and scrub_mem > 0:
+                log.debug("wait for scrub %s", scrub_mem)
+                while scrub_mem > 0 and retries < rlimit:
+                    time.sleep(sleep_time)
+                    physinfo = xc.physinfo()
+                    free_mem = physinfo['free_memory']
+                    scrub_mem = physinfo['scrub_memory']
+                    retries += 1
+                    sleep_time += SLEEP_TIME_GROWTH
+                log.debug("scrub for %d times", retries)
+
+            retries = 0
+            sleep_time = SLEEP_TIME_GROWTH
 
         while retries < rlimit:
             physinfo = xc.physinfo()
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/server/blkif.py     Wed Dec 24 12:52:34 2008 +0900
@@ -78,6 +78,10 @@ class BlkifController(DevController):
         if uuid:
             back['uuid'] = uuid
 
+        bootable = config.get('bootable', None)
+        if bootable != None:
+            back['bootable'] = str(bootable)
+
         if security.on() == xsconstants.XS_POLICY_USE:
             self.do_access_control(config, uname)
 
@@ -143,11 +147,12 @@ class BlkifController(DevController):
         config = DevController.getDeviceConfiguration(self, devid, transaction)
         if transaction is None:
             devinfo = self.readBackend(devid, 'dev', 'type', 'params', 'mode',
-                                       'uuid')
+                                       'uuid', 'bootable')
         else:
             devinfo = self.readBackendTxn(transaction, devid,
-                                          'dev', 'type', 'params', 'mode', 
'uuid')
-        dev, typ, params, mode, uuid = devinfo
+                                          'dev', 'type', 'params', 'mode', 
'uuid',
+                                          'bootable')
+        dev, typ, params, mode, uuid, bootable = devinfo
         
         if dev:
             if transaction is None:
@@ -165,6 +170,8 @@ class BlkifController(DevController):
             config['mode'] = mode
         if uuid:
             config['uuid'] = uuid
+        if bootable != None:
+            config['bootable'] = int(bootable)
 
         proto = self.readFrontend(devid, 'protocol')
         if proto:
diff -r 9837303a4708 -r 07f26e047fbf tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xcutils/xc_save.c   Wed Dec 24 12:52:34 2008 +0900
@@ -166,18 +166,12 @@ static int suspend(void)
 {
     unsigned long sx_state = 0;
 
-    /* Nothing to do if the guest is in an ACPI sleep state. */
+    /* Cannot notify guest to shut itself down if it's in ACPI sleep state. */
     if (si.flags & XCFLAGS_HVM)
         xc_get_hvm_param(si.xc_fd, si.domid,
                          HVM_PARAM_ACPI_S_STATE, &sx_state);
-    if (sx_state != 0) {
-        /* notify xend that it can do device migration */
-        printf("suspended\n");
-        fflush(stdout);
-        return 1;
-    }
-
-    if (si.suspend_evtchn >= 0)
+
+    if ((sx_state == 0) && (si.suspend_evtchn >= 0))
         return evtchn_suspend();
 
     return compat_suspend();
diff -r 9837303a4708 -r 07f26e047fbf tools/xenpmd/xenpmd.c
--- a/tools/xenpmd/xenpmd.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xenpmd/xenpmd.c     Wed Dec 24 12:52:34 2008 +0900
@@ -297,7 +297,6 @@ int get_next_battery_info_or_status(DIR 
     if  ( !info_or_status )
         return 0;
 
-    memset(line_info, 0, 256);
     if (type == BIF) 
         memset(info_or_status, 0, sizeof(struct battery_info));
     else 
@@ -307,11 +306,8 @@ int get_next_battery_info_or_status(DIR 
     if ( !file )
         return 0;
 
-    while ( fgets(line_info, 1024, file) != NULL ) 
-    {
+    while ( fgets(line_info, sizeof(line_info), file) != NULL ) 
         parse_battery_info_or_status(line_info, type, info_or_status);
-        memset(line_info, 0, 256);
-    }
 
     fclose(file);
     return 1;
diff -r 9837303a4708 -r 07f26e047fbf tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xenstat/xentop/xentop.c     Wed Dec 24 12:52:34 2008 +0900
@@ -254,7 +254,7 @@ static void fail(const char *str)
 {
        if(cwin != NULL && !isendwin())
                endwin();
-       fprintf(stderr, str);
+       fprintf(stderr, "%s", str);
        exit(1);
 }
 
diff -r 9837303a4708 -r 07f26e047fbf unmodified_drivers/linux-2.6/Makefile
--- a/unmodified_drivers/linux-2.6/Makefile     Wed Dec 24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/Makefile     Wed Dec 24 12:52:34 2008 +0900
@@ -4,3 +4,4 @@ obj-m += balloon/
 obj-m += balloon/
 obj-m += blkfront/
 obj-m += netfront/
+obj-m += scsifront/
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Wed Dec 
24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Wed Dec 
24 12:52:34 2008 +0900
@@ -147,7 +147,9 @@ extern char *kasprintf(gfp_t gfp, const 
  *   RHEL_VERSION
  */
 #if !defined(RHEL_VERSION) || (RHEL_VERSION == 4 && RHEL_UPDATE < 5)
+#if !defined(RHEL_MAJOR) || (RHEL_MAJOR == 4 && RHEL_MINOR < 5)
 typedef irqreturn_t (*irq_handler_t)(int, void *, struct pt_regs *);
+#endif
 #endif
 #endif
 
diff -r 9837303a4708 -r 07f26e047fbf unmodified_drivers/linux-2.6/overrides.mk
--- a/unmodified_drivers/linux-2.6/overrides.mk Wed Dec 24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/overrides.mk Wed Dec 24 12:52:34 2008 +0900
@@ -15,3 +15,4 @@ _XEN_CPPFLAGS += -include $(objtree)/inc
 
 EXTRA_CFLAGS += $(_XEN_CPPFLAGS)
 EXTRA_AFLAGS += $(_XEN_CPPFLAGS)
+CPPFLAGS := -I$(M)/include $(CPPFLAGS)
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/scsifront/Kbuild
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/unmodified_drivers/linux-2.6/scsifront/Kbuild     Wed Dec 24 12:52:34 
2008 +0900
@@ -0,0 +1,6 @@
+include $(M)/overrides.mk
+
+obj-m += xen-scsi.o
+
+xen-scsi-objs := scsifront.o xenbus.o
+
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/scsifront/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/unmodified_drivers/linux-2.6/scsifront/Makefile   Wed Dec 24 12:52:34 
2008 +0900
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/ia64/xen/cpufreq/cpufreq.c
--- a/xen/arch/ia64/xen/cpufreq/cpufreq.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/ia64/xen/cpufreq/cpufreq.c       Wed Dec 24 12:52:34 2008 +0900
@@ -275,6 +275,7 @@ acpi_cpufreq_cpu_exit (struct cpufreq_po
 }
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
+       .name       = "acpi-cpufreq",
        .verify     = acpi_cpufreq_verify,
        .target     = acpi_cpufreq_target,
        .get        = acpi_cpufreq_get,
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/Makefile     Wed Dec 24 12:52:34 2008 +0900
@@ -37,7 +37,6 @@ obj-y += numa.o
 obj-y += numa.o
 obj-y += pci.o
 obj-y += physdev.o
-obj-y += rwlock.o
 obj-y += setup.o
 obj-y += shutdown.o
 obj-y += smp.o
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed Dec 24 12:52:34 2008 +0900
@@ -71,7 +71,8 @@ static struct acpi_processor_power *__re
 
 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
 {
-    uint32_t i;
+    uint32_t i, idle_usage = 0;
+    uint64_t res, idle_res = 0;
 
     printk("==cpu%d==\n", cpu);
     printk("active state:\t\tC%d\n",
@@ -81,14 +82,21 @@ static void print_acpi_power(uint32_t cp
     
     for ( i = 1; i < power->count; i++ )
     {
+        res = acpi_pm_tick_to_ns(power->states[i].time);
+        idle_usage += power->states[i].usage;
+        idle_res += res;
+
         printk((power->last_state && power->last_state->idx == i) ?
                "   *" : "    ");
         printk("C%d:\t", i);
         printk("type[C%d] ", power->states[i].type);
         printk("latency[%03d] ", power->states[i].latency);
         printk("usage[%08d] ", power->states[i].usage);
-        printk("duration[%"PRId64"]\n", power->states[i].time);
-    }
+        printk("duration[%"PRId64"]\n", res);
+    }
+    printk("    C0:\tusage[%08d] duration[%"PRId64"]\n",
+           idle_usage, NOW() - idle_res);
+
 }
 
 static void dump_cx(unsigned char key)
@@ -317,8 +325,6 @@ static void acpi_processor_idle(void)
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
-        /* preparing TSC stop */
-        cstate_save_tsc();
         /* preparing APIC stop */
         lapic_timer_off();
 
@@ -751,8 +757,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 {
     const struct acpi_processor_power *power = processor_powers[cpuid];
-    struct vcpu *v = idle_vcpu[cpuid];
-    uint64_t usage;
+    uint64_t usage, res, idle_usage = 0, idle_res = 0;
     int i;
 
     if ( power == NULL )
@@ -765,20 +770,26 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
 
     stat->last = power->last_state ? power->last_state->idx : 0;
     stat->nr = power->count;
-    stat->idle_time = v->runstate.time[RUNSTATE_running];
-    if ( v->is_running )
-        stat->idle_time += NOW() - v->runstate.state_entry_time;
-
-    for ( i = 0; i < power->count; i++ )
-    {
-        usage = power->states[i].usage;
-        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
+    stat->idle_time = get_cpu_idle_time(cpuid);
+
+    for ( i = power->count - 1; i >= 0; i-- )
+    {
+        if ( i != 0 )
+        {
+            usage = power->states[i].usage;
+            res = acpi_pm_tick_to_ns(power->states[i].time);
+            idle_usage += usage;
+            idle_res += res;
+        }
+        else
+        {
+            usage = idle_usage;
+            res = NOW() - idle_res;
+        }
+        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) ||
+             copy_to_guest_offset(stat->residencies, i, &res, 1) )
             return -EFAULT;
     }
-    for ( i = 0; i < power->count; i++ )
-        if ( copy_to_guest_offset(stat->residencies, i, 
-                                  &power->states[i].time, 1) )
-            return -EFAULT;
 
     return 0;
 }
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Wed Dec 24 12:52:34 2008 +0900
@@ -131,9 +131,12 @@ struct drv_cmd {
     u32 val;
 };
 
-static void do_drv_read(struct drv_cmd *cmd)
-{
+static void do_drv_read(void *drvcmd)
+{
+    struct drv_cmd *cmd;
     u32 h;
+
+    cmd = (struct drv_cmd *)drvcmd;
 
     switch (cmd->type) {
     case SYSTEM_INTEL_MSR_CAPABLE:
@@ -174,7 +177,13 @@ static void drv_read(struct drv_cmd *cmd
 {
     cmd->val = 0;
 
-    do_drv_read(cmd);
+    ASSERT(cpus_weight(cmd->mask) == 1);
+
+    /* to reduce IPI for the sake of performance */
+    if (cpu_isset(smp_processor_id(), cmd->mask))
+        do_drv_read((void *)cmd);
+    else
+        on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
 }
 
 static void drv_write(struct drv_cmd *cmd)
@@ -184,13 +193,21 @@ static void drv_write(struct drv_cmd *cm
 
 static u32 get_cur_val(cpumask_t mask)
 {
+    struct cpufreq_policy *policy;
     struct processor_performance *perf;
     struct drv_cmd cmd;
+    unsigned int cpu;
 
     if (unlikely(cpus_empty(mask)))
         return 0;
 
-    switch (drv_data[first_cpu(mask)]->cpu_feature) {
+    cpu = first_cpu(mask);
+    policy = cpufreq_cpu_policy[cpu];
+
+    if (!policy)
+        return 0;    
+
+    switch (drv_data[policy->cpu]->cpu_feature) {
     case SYSTEM_INTEL_MSR_CAPABLE:
         cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
         cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
@@ -205,7 +222,7 @@ static u32 get_cur_val(cpumask_t mask)
         return 0;
     }
 
-    cmd.mask = mask;
+    cmd.mask = cpumask_of_cpu(cpu);
 
     drv_read(&cmd);
     return cmd.val;
@@ -255,28 +272,43 @@ static void  __get_measured_perf(void *p
 
 static unsigned int get_measured_perf(unsigned int cpu)
 {
-    unsigned int retval, perf_percent;
+    struct cpufreq_policy *policy;
+    unsigned int perf_percent;
     cpumask_t cpumask;
 
     if (!cpu_online(cpu))
         return 0;
 
-    cpumask = cpumask_of_cpu(cpu);
-    on_selected_cpus(cpumask, __get_measured_perf, (void *)&perf_percent,0,1);
-
-    retval = drv_data[cpu]->max_freq * perf_percent / 100;
-    return retval;
+    policy = cpufreq_cpu_policy[cpu];
+    if (!policy)
+        return 0;
+
+    /* Usually we take the short path (no IPI) for the sake of performance. */
+    if (cpu == smp_processor_id()) {
+        __get_measured_perf((void *)&perf_percent);
+    } else {
+        cpumask = cpumask_of_cpu(cpu);
+        on_selected_cpus(cpumask, __get_measured_perf, 
+                        (void *)&perf_percent,0,1);
+    }
+
+    return drv_data[cpu]->max_freq * perf_percent / 100;
 }
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
-    struct acpi_cpufreq_data *data = drv_data[cpu];
+    struct cpufreq_policy *policy;
+    struct acpi_cpufreq_data *data;
     unsigned int freq;
 
+    policy = cpufreq_cpu_policy[cpu];
+    if (!policy)
+        return 0;
+
+    data = drv_data[policy->cpu];
     if (unlikely(data == NULL ||
-        data->acpi_data == NULL || data->freq_table == NULL)) {
-        return 0;
-    }
+        data->acpi_data == NULL || data->freq_table == NULL))
+        return 0;
 
     freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
     return freq;
@@ -327,16 +359,10 @@ static int acpi_cpufreq_target(struct cp
 
     next_perf_state = data->freq_table[next_state].index;
     if (perf->state == next_perf_state) {
-        if (unlikely(policy->resume)) {
-            printk(KERN_INFO "Called after resume, resetting to P%d\n", 
-                next_perf_state);
+        if (unlikely(policy->resume))
             policy->resume = 0;
-        }
-        else {
-            printk(KERN_DEBUG "Already at target state (P%d)\n", 
-                next_perf_state);
+        else
             return 0;
-        }
     }
 
     switch (data->cpu_feature) {
@@ -555,6 +581,7 @@ static int acpi_cpufreq_cpu_exit(struct 
 }
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
+    .name   = "acpi-cpufreq",
     .verify = acpi_cpufreq_verify,
     .target = acpi_cpufreq_target,
     .init   = acpi_cpufreq_cpu_init,
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c      Wed Dec 24 12:52:34 2008 +0900
@@ -129,6 +129,16 @@ static int powernow_cpufreq_target(struc
     return result;
 }
 
+static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
+{
+    struct powernow_cpufreq_data *data;
+
+    if (!policy || !(data = drv_data[policy->cpu]))
+        return -EINVAL;
+
+    return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
 static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
     unsigned int i;
@@ -243,6 +253,7 @@ static int powernow_cpufreq_cpu_exit(str
 }
 
 static struct cpufreq_driver powernow_cpufreq_driver = {
+    .verify = powernow_cpufreq_verify,
     .target = powernow_cpufreq_target,
     .init   = powernow_cpufreq_cpu_init,
     .exit   = powernow_cpufreq_cpu_exit
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/apic.c       Wed Dec 24 12:52:34 2008 +0900
@@ -99,8 +99,11 @@ void __init apic_intr_init(void)
     /* Performance Counters Interrupt */
     set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
-    /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    /* CMCI Correctable Machine Check Interrupt */
+    set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
+
+    /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
+#ifdef CONFIG_X86_MCE_THERMAL
     set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 }
@@ -172,12 +175,17 @@ void clear_local_APIC(void)
     }
 
 /* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5) {
         v = apic_read(APIC_LVTTHMR);
         apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
     }
 #endif
+
+    if (maxlvt >= 6) {
+        v = apic_read(APIC_CMCI);
+        apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
+    }
     /*
      * Clean APIC state for other OSs:
      */
@@ -189,10 +197,13 @@ void clear_local_APIC(void)
     if (maxlvt >= 4)
         apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
 
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5)
         apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
 #endif
+    if (maxlvt >= 6)
+        apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
+
     v = GET_APIC_VERSION(apic_read(APIC_LVR));
     if (APIC_INTEGRATED(v)) {  /* !82489DX */
         if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
@@ -597,6 +608,7 @@ static struct {
     unsigned int apic_spiv;
     unsigned int apic_lvtt;
     unsigned int apic_lvtpc;
+    unsigned int apic_lvtcmci;
     unsigned int apic_lvt0;
     unsigned int apic_lvt1;
     unsigned int apic_lvterr;
@@ -608,7 +620,7 @@ int lapic_suspend(void)
 int lapic_suspend(void)
 {
     unsigned long flags;
-
+    int maxlvt = get_maxlvt();
     if (!apic_pm_state.active)
         return 0;
 
@@ -620,6 +632,11 @@ int lapic_suspend(void)
     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
     apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+
+    if (maxlvt >= 6) {
+        apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
+    }
+
     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
@@ -637,6 +654,7 @@ int lapic_resume(void)
 {
     unsigned int l, h;
     unsigned long flags;
+    int maxlvt = get_maxlvt();
 
     if (!apic_pm_state.active)
         return 0;
@@ -669,6 +687,11 @@ int lapic_resume(void)
     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
     apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+
+    if (maxlvt >= 6) {
+        apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
+    }
+
     apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/amd.c    Wed Dec 24 12:52:34 2008 +0900
@@ -461,8 +461,10 @@ static void __devinit init_amd(struct cp
 
        if (cpuid_eax(0x80000000) >= 0x80000007) {
                c->x86_power = cpuid_edx(0x80000007);
-               if (c->x86_power & (1<<8))
+               if (c->x86_power & (1<<8)) {
                        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+                       set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+               }
        }
 
 #ifdef CONFIG_X86_HT
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/intel.c  Wed Dec 24 12:52:34 2008 +0900
@@ -218,6 +218,10 @@ static void __devinit init_intel(struct 
        if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
                (c->x86 == 0x6 && c->x86_model >= 0x0e))
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+       if (cpuid_edx(0x80000007) & (1u<<8)) {
+               set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+               set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+       }
 
        start_vmx();
 }
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Wed Dec 24 12:52:34 2008 +0900
@@ -3,8 +3,7 @@ obj-y += amd_k8.o
 obj-y += amd_k8.o
 obj-y += amd_f10.o
 obj-y += mce.o
+obj-y += mce_intel.o
 obj-y += non-fatal.o
-obj-y += p4.o
 obj-$(x86_32) += p5.o
-obj-$(x86_32) += p6.o
 obj-$(x86_32) += winchip.o
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c  Wed Dec 24 12:52:34 2008 +0900
@@ -219,7 +219,7 @@ void k8_machine_check(struct cpu_user_re
                        show_execution_state(regs);
                }
                x86_mcinfo_dump(mc_data);
-               panic("End of MCE. Use mcelog to decode above error codes.\n");
+               mc_panic("End of MCE. Use mcelog to decode above error 
codes.\n");
        }
 
        /* If Dom0 registered a machine check handler, which is only possible
@@ -248,7 +248,7 @@ void k8_machine_check(struct cpu_user_re
                        /* Dom0 is impacted. Since noone can't handle
                         * this error, panic! */
                        x86_mcinfo_dump(mc_data);
-                       panic("MCE occured in Dom0, which it can't handle\n");
+                       mc_panic("MCE occured in Dom0, which it can't 
handle\n");
 
                        /* UNREACHED */
                } else {
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/k7.c      Wed Dec 24 12:52:34 2008 +0900
@@ -14,6 +14,7 @@
 #include <asm/msr.h>
 
 #include "mce.h"
+#include "x86_mca.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
 static fastcall void k7_machine_check(struct cpu_user_regs * regs, long 
error_code)
@@ -57,9 +58,9 @@ static fastcall void k7_machine_check(st
        }
 
        if (recover&2)
-               panic ("CPU context corrupt");
+               mc_panic ("CPU context corrupt");
        if (recover&1)
-               panic ("Unable to continue");
+               mc_panic ("Unable to continue");
        printk (KERN_EMERG "Attempting to continue.\n");
        mcgstl &= ~(1<<2);
        wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Wed Dec 24 12:52:34 2008 +0900
@@ -9,6 +9,7 @@
 #include <xen/config.h>
 #include <xen/smp.h>
 #include <xen/errno.h>
+#include <xen/console.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -26,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);      /* non-
  * to physical cpus present in the machine.
  * The more physical cpus are available, the more entries you need.
  */
-#define MAX_MCINFO     10
+#define MAX_MCINFO     20
 
 struct mc_machine_notify {
        struct mc_info mc;
@@ -109,6 +110,12 @@ static void amd_mcheck_init(struct cpuin
        }
 }
 
+/*check the existence of Machine Check*/
+int mce_available(struct cpuinfo_x86 *c)
+{
+       return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
@@ -134,11 +141,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
 #ifndef CONFIG_X86_64
                if (c->x86==5)
                        intel_p5_mcheck_init(c);
-               if (c->x86==6)
-                       intel_p6_mcheck_init(c);
 #endif
-               if (c->x86==15)
-                       intel_p4_mcheck_init(c);
+               /*If it is P6 or P4 family, including CORE 2 DUO series*/
+               if (c->x86 == 6 || c->x86==15)
+               {
+                       printk(KERN_DEBUG "MCE: Intel newly family MC Init\n");
+                       intel_mcheck_init(c);
+               }
                break;
 
 #ifndef CONFIG_X86_64
@@ -412,7 +421,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                if (mic == NULL)
                        return;
                if (mic->type != MC_TYPE_BANK)
-                       continue;
+                       goto next;
 
                mc_bank = (struct mcinfo_bank *)mic;
        
@@ -425,6 +434,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                        printk(" at %16"PRIx64, mc_bank->mc_addr);
 
                printk("\n");
+next:
                mic = x86_mcinfo_next(mic); /* next entry */
                if ((mic == NULL) || (mic->size == 0))
                        break;
@@ -574,3 +584,15 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
 
        return ret;
 }
+
+void mc_panic(char *s)
+{
+    console_start_sync();
+    printk("Fatal machine check: %s\n", s);
+    printk("\n"
+           "****************************************\n"
+           "\n"
+           "   The processor has reported a hardware error which cannot\n"
+           "   be recovered from.  Xen will now reboot the machine.\n");
+    panic("HARDWARE ERROR");
+}
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Wed Dec 24 12:52:34 2008 +0900
@@ -1,14 +1,22 @@
 #include <xen/init.h>
+#include <asm/types.h>
 #include <asm/traps.h>
+#include <asm/atomic.h>
+#include <asm/percpu.h>
+
 
 /* Init functions */
 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
 void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void intel_mcheck_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
 /* Function pointer used in the handlers to collect additional information
@@ -19,12 +27,11 @@ extern int (*mc_callback_bank_extended)(
                uint16_t bank, uint64_t status);
 
 
+int mce_available(struct cpuinfo_x86 *c);
 /* Helper functions used for collecting error telemetry */
 struct mc_info *x86_mcinfo_getptr(void);
 void x86_mcinfo_clear(struct mc_info *mi);
 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
 void x86_mcinfo_dump(struct mc_info *mi);
+void mc_panic(char *s);
 
-/* Global variables */
-extern int mce_disabled;
-extern unsigned int nr_mce_banks;
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce_intel.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Dec 24 12:52:34 2008 +0900
@@ -0,0 +1,632 @@
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+static int nr_intel_ext_msrs = 0;
+static int cmci_support = 0;
+extern int firstbank;
+
+#ifdef CONFIG_X86_MCE_THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
+                smp_processor_id());
+    add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    u32 l, h;
+    unsigned int cpu = smp_processor_id();
+    static s_time_t next[NR_CPUS];
+
+    ack_APIC_irq();
+    if (NOW() < next[cpu])
+        return;
+
+    next[cpu] = NOW() + MILLISECS(5000);
+    rdmsr(MSR_IA32_THERM_STATUS, l, h);
+    if (l & 0x1) {
+        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+                cpu);
+        add_taint(TAINT_MACHINE_CHECK);
+    } else {
+        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+    }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 
+        = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    irq_enter();
+    vendor_thermal_interrupt(regs);
+    irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+    int tm2 = 0;
+    unsigned int cpu = smp_processor_id();
+
+    /* Thermal monitoring */
+    if (!cpu_has(c, X86_FEATURE_ACPI))
+        return; /* -ENODEV */
+
+    /* Clock modulation */
+    if (!cpu_has(c, X86_FEATURE_ACC))
+        return; /* -ENODEV */
+
+    /* first check if its enabled already, in which case there might
+     * be some SMM goo which handles it, so we can't even put a handler
+     * since it might be delivered via SMI already -zwanem.
+     */
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    h = apic_read(APIC_LVTTHMR);
+    if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+        printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
+        return; /* -EBUSY */
+    }
+
+    if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+        tm2 = 1;
+
+    /* check whether a vector already exists, temporarily masked? */
+    if (h & APIC_VECTOR_MASK) {
+        printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already 
installed\n",
+                 cpu, (h & APIC_VECTOR_MASK));
+        return; /* -EBUSY */
+    }
+
+    /* The temperature transition interrupt handler setup */
+    h = THERMAL_APIC_VECTOR;    /* our delivery vector */
+    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
+    apic_write_around(APIC_LVTTHMR, h);
+
+    rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+    wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+    /* ok we're good to go... */
+    vendor_thermal_interrupt = intel_thermal_interrupt;
+
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+    l = apic_read (APIC_LVTTHMR);
+    apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+    printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 
+            cpu, tm2 ? "TM2" : "TM1");
+    return;
+}
+#endif /* CONFIG_X86_MCE_THERMAL */
+
+static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext)
+{
+    if (nr_intel_ext_msrs == 0)
+        return;
+
+    /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    memset(mc_ext, 0, sizeof(struct mcinfo_extended));
+    mc_ext->common.type = MC_TYPE_EXTENDED;
+    mc_ext->common.size = sizeof(mc_ext);
+    mc_ext->mc_msrs = 10;
+
+    mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX;
+    rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value);
+    mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX;
+    rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value);
+    mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX;
+    rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value);
+
+    mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX;
+    rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value);
+    mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI;
+    rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value);
+    mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI;
+    rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value);
+
+    mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP;
+    rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value);
+    mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP;
+    rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value);
+    mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
+    rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value);
+    mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP;
+    rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value);
+}
+
+/* machine_check_poll might be called by following types:
+ * 1. called when do mcheck_init.
+ * 2. called in cmci interrupt handler
+ * 3. called in polling handler
+ * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
+ * consumer.
+*/
+static int machine_check_poll(struct mc_info *mi, int calltype)
+{
+    int exceptions = (read_cr4() & X86_CR4_MCE);
+    int i, nr_unit = 0, uc = 0, pcc = 0;
+    uint64_t status, addr;
+    struct mcinfo_global mcg;
+    struct mcinfo_extended mce;
+    unsigned int cpu;
+    struct domain *d;
+
+    cpu = smp_processor_id();
+
+    if (!mi) {
+        printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n");
+        return 0;
+    }
+    x86_mcinfo_clear(mi);
+
+    memset(&mcg, 0, sizeof(mcg));
+    mcg.common.type = MC_TYPE_GLOBAL;
+    mcg.common.size = sizeof(mcg);
+    /*If called from cpu-reset check, don't need to fill them.
+     *If called from cmci context, we'll try to fill domid by memory addr
+    */
+    mcg.mc_domid = -1;
+    mcg.mc_vcpuid = -1;
+    if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
+        mcg.mc_flags = MC_FLAG_POLLED;
+    else if (calltype == MC_FLAG_CMCI)
+        mcg.mc_flags = MC_FLAG_CMCI;
+    mcg.mc_socketid = phys_proc_id[cpu];
+    mcg.mc_coreid = cpu_core_id[cpu];
+    mcg.mc_apicid = cpu_physical_id(cpu);
+    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
+
+    for ( i = 0; i < nr_mce_banks; i++ ) {
+        struct mcinfo_bank mcb;
+        /*For CMCI, only owners checks the owned MSRs*/
+        if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
+             (calltype & MC_FLAG_CMCI) )
+            continue;
+        rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+        if (! (status & MCi_STATUS_VAL) )
+            continue;
+        /*
+         * Uncorrected events are handled by the exception
+         * handler when it is enabled. But when the exception
+         * is disabled such as when mcheck_init, log everything.
+         */
+        if ((status & MCi_STATUS_UC) && exceptions)
+            continue;
+
+        if (status & MCi_STATUS_UC)
+            uc = 1;
+        if (status & MCi_STATUS_PCC)
+            pcc = 1;
+
+        memset(&mcb, 0, sizeof(mcb));
+        mcb.common.type = MC_TYPE_BANK;
+        mcb.common.size = sizeof(mcb);
+        mcb.mc_bank = i;
+        mcb.mc_status = status;
+        if (status & MCi_STATUS_MISCV)
+            rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc);
+        if (status & MCi_STATUS_ADDRV) {
+            rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+            d = maddr_get_owner(addr);
+            if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) 
)
+                mcb.mc_domid = d->domain_id;
+        }
+        if (cmci_support)
+            rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+        if (calltype == MC_FLAG_CMCI)
+            rdtscll(mcb.mc_tsc);
+        x86_mcinfo_add(mi, &mcb);
+        nr_unit++;
+        add_taint(TAINT_MACHINE_CHECK);
+        /*Clear state for this bank */
+        wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
+        printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
+                i, cpu, status);
+        printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], "
+                "thread[%d]\n", cpu, mcg.mc_socketid, 
+                mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
+ 
+    }
+    /*if pcc = 1, uc must be 1*/
+    if (pcc)
+        mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
+    else if (uc)
+        mcg.mc_flags |= MC_FLAG_RECOVERABLE;
+    else /*correctable*/
+        mcg.mc_flags |= MC_FLAG_CORRECTABLE;
+
+    if (nr_unit && nr_intel_ext_msrs && 
+                    (mcg.mc_gstatus & MCG_STATUS_EIPV)) {
+        intel_get_extended_msrs(&mce);
+        x86_mcinfo_add(mi, &mce);
+    }
+    if (nr_unit) 
+        x86_mcinfo_add(mi, &mcg);
+    /*Clear global state*/
+    return nr_unit;
+}
+
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
+{
+    /* MACHINE CHECK Error handler will be sent in another patch,
+     * simply copy old solutions here. This code will be replaced
+     * by upcoming machine check patches
+     */
+
+    int recover=1;
+    u32 alow, ahigh, high, low;
+    u32 mcgstl, mcgsth;
+    int i;
+   
+    rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+    if (mcgstl & (1<<0))       /* Recoverable ? */
+        recover=0;
+    
+    printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+           smp_processor_id(), mcgsth, mcgstl);
+    
+    for (i=0; i<nr_mce_banks; i++) {
+        rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+        if (high & (1<<31)) {
+            if (high & (1<<29))
+                recover |= 1;
+            if (high & (1<<25))
+                recover |= 2;
+            printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+            high &= ~(1<<31);
+            if (high & (1<<27)) {
+                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                printk ("[%08x%08x]", ahigh, alow);
+            }
+            if (high & (1<<26)) {
+                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                printk (" at %08x%08x", ahigh, alow);
+            }
+            printk ("\n");
+        }
+    }
+    
+    if (recover & 2)
+        mc_panic ("CPU context corrupt");
+    if (recover & 1)
+        mc_panic ("Unable to continue");
+    
+    printk(KERN_EMERG "Attempting to continue.\n");
+    /* 
+     * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+     * recoverable/continuable.This will allow BIOS to look at the MSRs
+     * for errors if the OS could not log the error.
+     */
+    for (i=0; i<nr_mce_banks; i++) {
+        u32 msr;
+        msr = MSR_IA32_MC0_STATUS+i*4;
+        rdmsr (msr, low, high);
+        if (high&(1<<31)) {
+            /* Clear it */
+            wrmsr(msr, 0UL, 0UL);
+            /* Serialize */
+            wmb();
+            add_taint(TAINT_MACHINE_CHECK);
+        }
+    }
+    mcgstl &= ~(1<<2);
+    wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+
+/*
+ * Discover bank sharing using the algorithm recommended in the SDM.
+ */
+static int do_cmci_discover(int i)
+{
+    unsigned msr = MSR_IA32_MC0_CTL2 + i;
+    u64 val;
+
+    rdmsrl(msr, val);
+    /* Some other CPU already owns this bank. */
+    if (val & CMCI_EN) {
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+        goto out;
+    }
+    wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
+    rdmsrl(msr, val);
+
+    if (!(val & CMCI_EN)) {
+        /* This bank does not support CMCI. Polling timer has to handle it. */
+        set_bit(i, __get_cpu_var(no_cmci_banks));
+        return 0;
+    }
+    set_bit(i, __get_cpu_var(mce_banks_owned));
+out:
+    clear_bit(i, __get_cpu_var(no_cmci_banks));
+    return 1;
+}
+
+static void cmci_discover(void)
+{
+    unsigned long flags;
+    int i;
+
+    printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
+
+    spin_lock_irqsave(&cmci_discover_lock, flags);
+
+    for (i = 0; i < nr_mce_banks; i++)
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            do_cmci_discover(i);
+
+    spin_unlock_irqrestore(&cmci_discover_lock, flags);
+
+    printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
+           smp_processor_id(), 
+           *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
+           *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+}
+
+/*
+ * Define an owner for each bank. Banks can be shared between CPUs
+ * and to avoid reporting events multiple times always set up one
+ * CPU as owner. 
+ *
+ * The assignment has to be redone when CPUs go offline and
+ * any of the owners goes away. Also pollers run in parallel so we
+ * have to be careful to update the banks in a way that doesn't
+ * lose or duplicate events.
+ */
+
+static void mce_set_owner(void)
+{
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    cmci_discover();
+}
+
+static void __cpu_mcheck_distribute_cmci(void *unused)
+{
+    cmci_discover();
+}
+
+void cpu_mcheck_distribute_cmci(void)
+{
+    if (cmci_support && !mce_disabled)
+        on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0);
+}
+
+static void clear_cmci(void)
+{
+    int i;
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 
+            smp_processor_id());
+
+    for (i = 0; i < nr_mce_banks; i++) {
+        unsigned msr = MSR_IA32_MC0_CTL2 + i;
+        u64 val;
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        rdmsrl(msr, val);
+        if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
+            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+    }
+}
+
+void cpu_mcheck_disable(void)
+{
+    clear_in_cr4(X86_CR4_MCE);
+
+    if (cmci_support && !mce_disabled)
+        clear_cmci();
+}
+
+static void intel_init_cmci(struct cpuinfo_x86 *c)
+{
+    u32 l, apic;
+    int cpu = smp_processor_id();
+
+    if (!mce_available(c) || !cmci_support) {
+        printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
+        return;
+    }
+
+    apic = apic_read(APIC_CMCI);
+    if ( apic & APIC_VECTOR_MASK )
+    {
+        printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
+            cpu, ( apic & APIC_VECTOR_MASK ));
+        return;
+    }
+
+    apic = CMCI_APIC_VECTOR;
+    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+    apic_write_around(APIC_CMCI, apic);
+
+    l = apic_read(APIC_CMCI);
+    apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
+}
+
+fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    int cpu = smp_processor_id();
+
+    ack_APIC_irq();
+    irq_enter();
+    printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu);
+    nr_unit = machine_check_poll(mi, MC_FLAG_CMCI);
+    if (nr_unit) {
+        x86_mcinfo_dump(mi);
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+    irq_exit();
+}
+
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+
+#ifdef CONFIG_X86_MCE_THERMAL
+    intel_init_thermal(c);
+#endif
+    intel_init_cmci(c);
+}
+
+static void mce_cap_init(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if ((l & MCG_CMCI_P) && cpu_has_apic)
+        cmci_support = 1;
+
+    nr_mce_banks = l & 0xff;
+    if (nr_mce_banks > MAX_NR_BANKS)
+        printk(KERN_WARNING "MCE: exceed max mce banks\n");
+    if (l & MCG_EXT_P)
+    {
+        nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
+        printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
+            smp_processor_id(), nr_intel_ext_msrs);
+    }
+    /* for most of p6 family, bank 0 is an alias bios MSR.
+     * But after model>1a, bank 0 is available*/
+    if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL
+            && c->x86_model < 0x1A)
+        firstbank = 1;
+    else
+        firstbank = 0;
+}
+
+static void mce_init(void)
+{
+    u32 l, h;
+    int i, nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    clear_in_cr4(X86_CR4_MCE);
+    /* log the machine checks left over from the previous reset.
+     * This also clears all registers*/
+
+    nr_unit = machine_check_poll(mi, MC_FLAG_RESET);
+    /*in the boot up stage, not expect inject to DOM0, but go print out
+    */
+    if (nr_unit > 0)
+        x86_mcinfo_dump(mi);
+
+    set_in_cr4(X86_CR4_MCE);
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if (l & MCG_CTL_P) /* Control register present ? */
+        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+    for (i = firstbank; i < nr_mce_banks; i++)
+    {
+        /*Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already.*/
+        rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
+        if (!l & !h)
+        {
+            /*if ctl is 0, this bank is never initialized*/
+            printk(KERN_DEBUG "mce_init: init bank%d\n", i);
+            wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
+            wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
+       }
+    }
+    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+}
+
+/*p4/p6 faimily has similar MCA initialization process*/
+void intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+    mce_cap_init(c);
+    printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+            smp_processor_id());
+    /* machine check is available */
+    machine_check_vector = intel_machine_check;
+    mce_init();
+    mce_intel_feature_init(c);
+    mce_set_owner();
+}
+
+/*
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll faster. When the poller finds no more 
+ * errors, poll slower
+*/
+static struct timer mce_timer;
+
+#define MCE_PERIOD 4000
+#define MCE_MIN    2000
+#define MCE_MAX    32000
+
+static u64 period = MCE_PERIOD;
+static int adjust = 0;
+
+static void mce_intel_checkregs(void *info)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+
+    if( !mce_available(&current_cpu_data))
+        return;
+    nr_unit = machine_check_poll(mi, MC_FLAG_POLLED);
+    if (nr_unit)
+    {
+        x86_mcinfo_dump(mi);
+        adjust++;
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+}
+
+static void mce_intel_work_fn(void *data)
+{
+    on_each_cpu(mce_intel_checkregs, data, 1, 1);
+    if (adjust) {
+        period = period / (adjust + 1);
+        printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval "
+               "to %"PRIu64"\n", period);
+    }
+    else {
+        period *= 2;
+    }
+    if (period > MCE_MAX) 
+        period = MCE_MAX;
+    if (period < MCE_MIN)
+        period = MCE_MIN;
+    set_timer(&mce_timer, NOW() + MILLISECS(period));
+    adjust = 0;
+}
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c)
+{
+    printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n");
+    init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+    set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD));
+}
+
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c       Wed Dec 24 12:52:34 2008 +0900
@@ -19,8 +19,8 @@
 #include <asm/msr.h>
 
 #include "mce.h"
-
-static int firstbank;
+#include "x86_mca.h"
+int firstbank = 0;
 static struct timer mce_timer;
 
 #define MCE_PERIOD MILLISECS(15000)
@@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
+       if (!mce_available(c))
                return -ENODEV;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return -ENODEV;
-
        /*
         * Check for non-fatal errors every MCE_RATE s

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>