# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID fdf25330e4a699c4b90aa28cc30843447cf9da61
# Parent 59d4c1863330e2023b05043d450da39cda47bd5a
# Parent f91cc71173c55f18280b12e6732d9e7509d208be
merge with xen-unstable.hg
---
tools/blktap/Makefile | 93
tools/blktap/README | 137 -
tools/blktap/README.sept05 | 33
tools/blktap/blkdump.c | 62
tools/blktap/blkif.c | 212 -
tools/blktap/blktaplib.c | 453 ---
tools/blktap/blktaplib.h | 171 -
tools/blktap/list.h | 55
tools/blktap/parallax/Makefile | 62
tools/blktap/parallax/README | 171 -
tools/blktap/parallax/block-async.c | 393 --
tools/blktap/parallax/block-async.h | 69
tools/blktap/parallax/blockstore.c | 1348
----------
tools/blktap/parallax/blockstore.h | 134
tools/blktap/parallax/blockstored.c | 275 --
tools/blktap/parallax/bstest.c | 191 -
tools/blktap/parallax/parallax.c | 608 ----
tools/blktap/parallax/radix.c | 631 ----
tools/blktap/parallax/radix.h | 45
tools/blktap/parallax/requests-async.c | 762 -----
tools/blktap/parallax/requests-async.h | 29
tools/blktap/parallax/snaplog.c | 238 -
tools/blktap/parallax/snaplog.h | 61
tools/blktap/parallax/vdi.c | 367 --
tools/blktap/parallax/vdi.h | 55
tools/blktap/parallax/vdi_create.c | 52
tools/blktap/parallax/vdi_fill.c | 81
tools/blktap/parallax/vdi_list.c | 47
tools/blktap/parallax/vdi_snap.c | 43
tools/blktap/parallax/vdi_snap_delete.c | 48
tools/blktap/parallax/vdi_snap_list.c | 82
tools/blktap/parallax/vdi_tree.c | 132
tools/blktap/parallax/vdi_unittest.c | 184 -
tools/blktap/parallax/vdi_validate.c | 97
tools/blktap/ublkback/Makefile | 40
tools/blktap/ublkback/ublkback.c | 18
tools/blktap/ublkback/ublkbacklib.c | 473 ---
tools/blktap/ublkback/ublkbacklib.h | 16
tools/blktap/xenbus.c | 568 ----
docs/src/user.tex | 3
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 51
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c | 193 -
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c | 19
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c | 54
linux-2.6-xen-sparse/drivers/xen/Kconfig | 10
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 10
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 3
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 4
linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile | 2
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c | 16
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile | 2
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 4
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 23
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 17
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h | 22
patches/linux-2.6.16.13/ipv6-no-autoconf.patch | 20
tools/examples/network-bridge | 36
tools/examples/vif-bridge | 12
tools/examples/xen-network-common.sh | 45
tools/examples/xmexample.hvm | 12
tools/firmware/acpi/Makefile | 7
tools/firmware/acpi/acpi2_0.h | 2
tools/firmware/acpi/acpi_dsdt.asl | 345 ++
tools/firmware/acpi/acpi_dsdt.c | 399 +-
tools/firmware/acpi/acpi_fadt.h | 21
tools/ioemu/hw/pc.c | 8
tools/ioemu/hw/pci.c | 19
tools/ioemu/hw/piix4acpi.c | 481 +++
tools/ioemu/target-i386-dm/Makefile | 2
tools/libxc/xc_domain.c | 11
tools/libxc/xc_linux_restore.c | 66
tools/libxc/xc_linux_save.c | 4
tools/libxc/xenctrl.h | 4
tools/python/xen/lowlevel/xc/xc.c | 31
tools/python/xen/util/SSHTransport.py | 102
tools/python/xen/util/xmlrpclib2.py | 55
tools/python/xen/xend/XendClient.py | 13
tools/python/xen/xend/XendDomainInfo.py | 5
tools/python/xen/xm/create.py | 2
tools/python/xen/xm/main.py | 34
tools/security/secpol_tool.c | 7
tools/xm-test/grouptest/default | 2
tools/xm-test/grouptest/medium | 2
tools/xm-test/lib/XmTestLib/Console.py | 70
tools/xm-test/tests/memset/03_memset_random_pos.py | 6
xen/acm/acm_core.c | 5
xen/acm/acm_policy.c | 45
xen/arch/ia64/linux-xen/smp.c | 36
xen/arch/ia64/xen/domain.c | 4
xen/arch/ia64/xen/xensetup.c | 3
xen/arch/x86/Makefile | 2
xen/arch/x86/audit.c | 4
xen/arch/x86/hvm/vmx/vmcs.c | 17
xen/arch/x86/hvm/vmx/vmx.c | 13
xen/arch/x86/mm.c | 205 +
xen/arch/x86/setup.c | 7
xen/arch/x86/shadow.c | 125
xen/arch/x86/shadow32.c | 12
xen/arch/x86/shadow_guest32pae.c | 2
xen/arch/x86/shadow_public.c | 40
xen/arch/x86/time.c | 2
xen/arch/x86/traps.c | 242 +
xen/arch/x86/x86_32/seg_fixup.c | 2
xen/arch/x86/x86_32/traps.c | 44
xen/arch/x86/x86_64/traps.c | 37
xen/arch/x86/x86_emulate.c | 4
xen/common/acm_ops.c | 2
xen/common/dom0_ops.c | 19
xen/common/domain.c | 2
xen/common/kernel.c | 5
xen/common/keyhandler.c | 20
xen/common/memory.c | 2
xen/common/sched_credit.c | 4
xen/common/sched_sedf.c | 26
xen/common/schedule.c | 4
xen/drivers/char/console.c | 45
xen/include/acm/acm_core.h | 9
xen/include/acm/acm_hooks.h | 18
xen/include/asm-ia64/debugger.h | 8
xen/include/asm-ia64/vmx.h | 1
xen/include/asm-ia64/xenprocessor.h | 2
xen/include/asm-x86/hvm/support.h | 2
xen/include/asm-x86/mm.h | 2
xen/include/asm-x86/processor.h | 15
xen/include/asm-x86/shadow.h | 44
xen/include/asm-x86/shadow_64.h | 36
xen/include/asm-x86/shadow_ops.h | 8
xen/include/public/arch-x86_32.h | 9
xen/include/public/arch-x86_64.h | 14
xen/include/public/dom0_ops.h | 23
xen/include/public/memory.h | 14
xen/include/xen/console.h | 2
xen/include/xen/lib.h | 1
xen/include/xen/sched.h | 1
134 files changed, 2608 insertions(+), 9373 deletions(-)
diff -r 59d4c1863330 -r fdf25330e4a6 docs/src/user.tex
--- a/docs/src/user.tex Fri Jun 23 15:26:01 2006 -0600
+++ b/docs/src/user.tex Fri Jun 23 15:33:25 2006 -0600
@@ -1972,7 +1972,8 @@ editing \path{grub.conf}.
\item [ console=$<$specifier list$>$ ] Specify the destination for Xen
console I/O. This is a comma-separated list of, for example:
\begin{description}
- \item[ vga ] Use VGA console and allow keyboard input.
+ \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf
+ vga[keep] } is specified).
\item[ com1 ] Use serial port com1.
\item[ com2H ] Use serial port com2. Transmitted chars will have the
MSB set. Received chars must have MSB set.
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Jun 23 15:33:25
2006 -0600
@@ -273,6 +273,49 @@ static void dump_fault_path(unsigned lon
}
#endif
+static int spurious_fault(struct pt_regs *regs,
+ unsigned long address,
+ unsigned long error_code)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+#ifdef CONFIG_XEN
+ /* Faults in hypervisor area are never spurious. */
+ if (address >= HYPERVISOR_VIRT_START)
+ return 0;
+#endif
+
+ /* Reserved-bit violation or user access to kernel space? */
+ if (error_code & 0x0c)
+ return 0;
+
+ pgd = init_mm.pgd + pgd_index(address);
+ if (!pgd_present(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return 0;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return 0;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(*pte))
+ return 0;
+ if ((error_code & 0x02) && !pte_write(*pte))
+ return 0;
+#ifdef CONFIG_X86_PAE
+ if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX))
+ return 0;
+#endif
+
+ return 1;
+}
/*
* This routine handles page faults. It determines the address,
@@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(st
* protection error (error_code & 1) == 0.
*/
if (unlikely(address >= TASK_SIZE)) {
+#ifdef CONFIG_XEN
+ /* Faults in hypervisor area can never be patched up. */
+ if (address >= HYPERVISOR_VIRT_START)
+ goto bad_area_nosemaphore;
+#endif
if (!(error_code & 5))
goto vmalloc_fault;
+ /* Can take a spurious fault if mapping changes R/O -> R/W. */
+ if (spurious_fault(regs, address, error_code))
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Jun 23 15:33:25
2006 -0600
@@ -263,6 +263,10 @@ static void contiguous_bitmap_clear(
}
}
+/* Protected by balloon_lock. */
+#define MAX_CONTIG_ORDER 7
+static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+
/* Ensure multi-page extents are contiguous in machine memory. */
int xen_create_contiguous_region(
unsigned long vstart, unsigned int order, unsigned int address_bits)
@@ -271,13 +275,23 @@ int xen_create_contiguous_region(
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ unsigned long *in_frames = discontig_frames, out_frame;
unsigned long frame, i, flags;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
+ long rc;
+ int success;
+ struct xen_memory_exchange exchange = {
+ .in = {
+ .nr_extents = 1UL << order,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ },
+ .out = {
+ .nr_extents = 1,
+ .extent_order = order,
+ .address_bits = address_bits,
+ .domid = DOMID_SELF
+ }
};
- set_xen_guest_handle(reservation.extent_start, &frame);
/*
* Currently an auto-translated guest will not perform I/O, nor will
@@ -287,68 +301,73 @@ int xen_create_contiguous_region(
if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
+ if (order > MAX_CONTIG_ORDER)
+ return -ENOMEM;
+
+ set_xen_guest_handle(exchange.in.extent_start, in_frames);
+ set_xen_guest_handle(exchange.out.extent_start, &out_frame);
+
scrub_pages(vstart, 1 << order);
balloon_lock(flags);
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1<<order); i++) {
+ /* 1. Zap current PTEs, remembering MFNs. */
+ for (i = 0; i < (1UL<<order); i++) {
pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- frame = pte_mfn(*pte);
- BUG_ON(HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+ in_frames[i] = pte_mfn(*pte);
+ if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+ __pte_ma(0), 0))
+ BUG();
set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
INVALID_P2M_ENTRY);
- BUG_ON(HYPERVISOR_memory_op(
- XENMEM_decrease_reservation, &reservation) != 1);
}
/* 2. Get a new contiguous memory extent. */
- reservation.extent_order = order;
- reservation.address_bits = address_bits;
- frame = __pa(vstart) >> PAGE_SHIFT;
- if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &reservation) != 1)
- goto fail;
+ out_frame = __pa(vstart) >> PAGE_SHIFT;
+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+ success = (exchange.nr_exchanged == (1UL << order));
+ BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
+ BUG_ON(success && (rc != 0));
+ if (unlikely(rc == -ENOSYS)) {
+ /* Compatibility when XENMEM_exchange is unsupported. */
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &exchange.in) != (1UL << order))
+ BUG();
+ success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+ &exchange.out) == 1);
+ if (!success) {
+ /* Couldn't get special memory: fall back to normal. */
+ for (i = 0; i < (1UL<<order); i++)
+ in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
+ if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+ &exchange.in) != (1UL<<order))
+ BUG();
+ }
+ }
/* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1<<order); i++) {
- BUG_ON(HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE),
- pfn_pte_ma(frame+i, PAGE_KERNEL), 0));
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i);
+ for (i = 0; i < (1UL<<order); i++) {
+ frame = success ? (out_frame + i) : in_frames[i];
+ if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+ pfn_pte_ma(frame,
+ PAGE_KERNEL),
+ 0))
+ BUG();
+ set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
}
flush_tlb_all();
- contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+ if (success)
+ contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
+ 1UL << order);
balloon_unlock(flags);
- return 0;
-
- fail:
- reservation.extent_order = 0;
- reservation.address_bits = 0;
-
- for (i = 0; i < (1<<order); i++) {
- frame = (__pa(vstart) >> PAGE_SHIFT) + i;
- BUG_ON(HYPERVISOR_memory_op(
- XENMEM_populate_physmap, &reservation) != 1);
- BUG_ON(HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE),
- pfn_pte_ma(frame, PAGE_KERNEL), 0));
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
- }
-
- flush_tlb_all();
-
- balloon_unlock(flags);
-
- return -ENOMEM;
+ return success ? 0 : -ENOMEM;
}
void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
@@ -357,47 +376,79 @@ void xen_destroy_contiguous_region(unsig
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ unsigned long *out_frames = discontig_frames, in_frame;
unsigned long frame, i, flags;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
+ long rc;
+ int success;
+ struct xen_memory_exchange exchange = {
+ .in = {
+ .nr_extents = 1,
+ .extent_order = order,
+ .domid = DOMID_SELF
+ },
+ .out = {
+ .nr_extents = 1UL << order,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ }
};
- set_xen_guest_handle(reservation.extent_start, &frame);
if (xen_feature(XENFEAT_auto_translated_physmap) ||
!test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
return;
+ if (order > MAX_CONTIG_ORDER)
+ return;
+
+ set_xen_guest_handle(exchange.in.extent_start, &in_frame);
+ set_xen_guest_handle(exchange.out.extent_start, out_frames);
+
scrub_pages(vstart, 1 << order);
balloon_lock(flags);
contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1<<order); i++) {
- pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
- pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
- pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
- pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- frame = pte_mfn(*pte);
- BUG_ON(HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+ /* 1. Find start MFN of contiguous extent. */
+ pgd = pgd_offset_k(vstart);
+ pud = pud_offset(pgd, vstart);
+ pmd = pmd_offset(pud, vstart);
+ pte = pte_offset_kernel(pmd, vstart);
+ in_frame = pte_mfn(*pte);
+
+ /* 2. Zap current PTEs. */
+ for (i = 0; i < (1UL<<order); i++) {
+ if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+ __pte_ma(0), 0));
set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
INVALID_P2M_ENTRY);
- BUG_ON(HYPERVISOR_memory_op(
- XENMEM_decrease_reservation, &reservation) != 1);
- }
-
- /* 2. Map new pages in place of old pages. */
- for (i = 0; i < (1<<order); i++) {
- frame = (__pa(vstart) >> PAGE_SHIFT) + i;
- BUG_ON(HYPERVISOR_memory_op(
- XENMEM_populate_physmap, &reservation) != 1);
- BUG_ON(HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE),
- pfn_pte_ma(frame, PAGE_KERNEL), 0));
+ out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
+ }
+
+ /* 3. Do the exchange for non-contiguous MFNs. */
+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+ success = (exchange.nr_exchanged == 1);
+ BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
+ BUG_ON(success && (rc != 0));
+ if (rc == -ENOSYS) {
+ /* Compatibility when XENMEM_exchange is unsupported. */
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &exchange.in) != 1)
+ BUG();
+ if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
+ &exchange.out) != (1UL << order))
+ BUG();
+ success = 1;
+ }
+
+ /* 4. Map new pages in place of old pages. */
+ for (i = 0; i < (1UL<<order); i++) {
+ frame = success ? out_frames[i] : (in_frame + i);
+ if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
+ pfn_pte_ma(frame,
+ PAGE_KERNEL),
+ 0))
+ BUG();
set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
}
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Jun 23
15:33:25 2006 -0600
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/percpu.h>
+#include <linux/module.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -92,8 +93,16 @@ static void __init setup_boot_cpu_data(v
boot_cpu_data.x86_mask = eax & 0xf;
}
+#include <xen/interface/memory.h>
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
void __init x86_64_start_kernel(char * real_mode_data)
{
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
char *s;
int i;
@@ -104,6 +113,16 @@ void __init x86_64_start_kernel(char * r
start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
xen_start_info->nr_pt_frames;
}
+
+
+ machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ }
+ while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+ machine_to_phys_order++;
#if 0
for (i = 0; i < 256; i++)
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Jun 23 15:33:25
2006 -0600
@@ -307,6 +307,49 @@ int exception_trace = 1;
#define MEM_LOG(_f, _a...) ((void)0)
#endif
+static int spurious_fault(struct pt_regs *regs,
+ unsigned long address,
+ unsigned long error_code)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+#ifdef CONFIG_XEN
+ /* Faults in hypervisor area are never spurious. */
+ if ((address >= HYPERVISOR_VIRT_START) &&
+ (address < HYPERVISOR_VIRT_END))
+ return 0;
+#endif
+
+ /* Reserved-bit violation or user access to kernel space? */
+ if (error_code & (PF_RSVD|PF_USER))
+ return 0;
+
+ pgd = init_mm.pgd + pgd_index(address);
+ if (!pgd_present(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return 0;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return 0;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(*pte))
+ return 0;
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
+ return 0;
+
+ return 1;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault(
*/
if (unlikely(address >= TASK_SIZE64)) {
/*
- * Must check for the entire kernel range here: with writable
- * page tables the hypervisor may temporarily clear PMD
- * entries.
+ * Don't check for the module range here: its PML4
+ * is always initialized because it's shared with the main
+ * kernel text. Only vmalloc may need PML4 syncups.
*/
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- address >= PAGE_OFFSET) {
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
if (vmalloc_fault(address) < 0)
goto bad_area_nosemaphore;
return;
}
+ /* Can take a spurious fault if mapping changes R/O -> R/W. */
+ if (spurious_fault(regs, address, error_code))
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Jun 23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Jun 23 15:33:25 2006 -0600
@@ -27,6 +27,11 @@ config XEN_UNPRIVILEGED_GUEST
config XEN_UNPRIVILEGED_GUEST
bool
default !XEN_PRIVILEGED_GUEST
+
+config XEN_PRIVCMD
+ bool
+ depends on PROC_FS
+ default y
config XEN_BACKEND
tristate "Backend driver support"
@@ -84,6 +89,11 @@ config XEN_BLKDEV_BACKEND
block devices to other guests via a high-performance shared-memory
interface.
+config XEN_XENBUS_DEV
+ bool
+ depends on PROC_FS
+ default y
+
config XEN_NETDEV_BACKEND
tristate "Network-device backend driver"
depends on XEN_BACKEND && NET
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Jun 23
15:33:25 2006 -0600
@@ -58,7 +58,9 @@
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *balloon_pde;
+#endif
static DECLARE_MUTEX(balloon_mutex);
@@ -403,6 +405,7 @@ static int balloon_init_watcher(struct n
return NOTIFY_DONE;
}
+#ifdef CONFIG_PROC_FS
static int balloon_write(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
@@ -456,6 +459,7 @@ static int balloon_read(char *page, char
*eof = 1;
return len;
}
+#endif
static struct notifier_block xenstore_notifier;
@@ -464,10 +468,10 @@ static int __init balloon_init(void)
unsigned long pfn;
struct page *page;
- IPRINTK("Initialising balloon driver.\n");
-
if (!is_running_on_xen())
return -ENODEV;
+
+ IPRINTK("Initialising balloon driver.\n");
current_pages = min(xen_start_info->nr_pages, max_pfn);
totalram_pages = current_pages;
@@ -481,6 +485,7 @@ static int __init balloon_init(void)
balloon_timer.data = 0;
balloon_timer.function = balloon_alarm;
+#ifdef CONFIG_PROC_FS
if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
WPRINTK("Unable to create /proc/xen/balloon.\n");
return -1;
@@ -488,6 +493,7 @@ static int __init balloon_init(void)
balloon_pde->read_proc = balloon_read;
balloon_pde->write_proc = balloon_write;
+#endif
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri Jun 23
15:33:25 2006 -0600
@@ -109,6 +109,9 @@ static int __init setup_vcpu_hotplug_eve
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };
+ if (!is_running_on_xen())
+ return -ENODEV;
+
register_cpu_notifier(&hotplug_cpu);
register_xenstore_notifier(&xsn_cpu);
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri Jun 23 15:33:25
2006 -0600
@@ -666,6 +666,10 @@ int irq_ignore_unhandled(unsigned int ir
int irq_ignore_unhandled(unsigned int irq)
{
struct physdev_irq_status_query irq_status = { .irq = irq };
+
+ if (!is_running_on_xen())
+ return 0;
+
(void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
return !!(irq_status.flags & XENIRQSTAT_shared);
}
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:33:25
2006 -0600
@@ -1,2 +1,2 @@
-obj-y := privcmd.o
+obj-$(CONFIG_XEN_PRIVCMD) := privcmd.o
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Jun 23
15:33:25 2006 -0600
@@ -71,8 +71,6 @@ static int packet_read_shmem(struct pack
char *buffer, int isuserbuffer, u32 left);
static int vtpm_queue_packet(struct packet *pak);
-#define MIN(x,y) (x) < (y) ? (x) : (y)
-
/***************************************************************
Buffer copying fo user and kernel space buffes.
***************************************************************/
@@ -309,7 +307,7 @@ int _packet_write(struct packet *pak,
return 0;
}
- tocopy = MIN(size - offset, PAGE_SIZE);
+ tocopy = min_t(size_t, size - offset, PAGE_SIZE);
if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
(tx->addr & ~PAGE_MASK)),
@@ -365,7 +363,7 @@ static int packet_read(struct packet *pa
u32 instance_no = htonl(pak->tpm_instance);
u32 last_read = pak->last_read;
- to_copy = MIN(4 - last_read, numbytes);
+ to_copy = min_t(size_t, 4 - last_read, numbytes);
if (copy_to_buffer(&buffer[0],
&(((u8 *) & instance_no)[last_read]),
@@ -384,7 +382,7 @@ static int packet_read(struct packet *pa
if (room_left > 0) {
if (pak->data_buffer) {
- u32 to_copy = MIN(pak->data_len - offset, room_left);
+ u32 to_copy = min_t(u32, pak->data_len - offset,
room_left);
u32 last_read = pak->last_read - 4;
if (copy_to_buffer(&buffer[offset],
@@ -424,7 +422,7 @@ static int packet_read_shmem(struct pack
* and within that page at offset 'offset'.
* Copy a maximum of 'room_left' bytes.
*/
- to_copy = MIN(PAGE_SIZE - pg_offset, room_left);
+ to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
while (to_copy > 0) {
void *src;
struct gnttab_map_grant_ref map_op;
@@ -451,7 +449,7 @@ static int packet_read_shmem(struct pack
/*
* User requests more than what's available
*/
- to_copy = MIN(tx->size, to_copy);
+ to_copy = min_t(u32, tx->size, to_copy);
}
DPRINTK("Copying from mapped memory at %08lx\n",
@@ -483,7 +481,7 @@ static int packet_read_shmem(struct pack
last_read += to_copy;
room_left -= to_copy;
- to_copy = MIN(PAGE_SIZE, room_left);
+ to_copy = min_t(u32, PAGE_SIZE, room_left);
i++;
} /* while (to_copy > 0) */
/*
@@ -545,7 +543,7 @@ static ssize_t vtpm_op_read(struct file
DPRINTK("size given by app: %d, available: %d\n", size, left);
- ret_size = MIN(size, left);
+ ret_size = min_t(size_t, size, left);
ret_size = packet_read(pak, ret_size, data, size, 1);
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Fri Jun 23 15:26:01
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Fri Jun 23 15:33:25
2006 -0600
@@ -9,4 +9,4 @@ xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o
-xenbus-objs += xenbus_dev.o
+obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jun 23
15:33:25 2006 -0600
@@ -926,6 +926,7 @@ void xenbus_probe(void *unused)
}
+#ifdef CONFIG_PROC_FS
static struct file_operations xsd_kva_fops;
static struct proc_dir_entry *xsd_kva_intf;
static struct proc_dir_entry *xsd_port_intf;
@@ -964,6 +965,7 @@ static int xsd_port_read(char *page, cha
*eof = 1;
return len;
}
+#endif
static int __init xenbus_probe_init(void)
@@ -1008,6 +1010,7 @@ static int __init xenbus_probe_init(void
BUG_ON(err);
xen_start_info->store_evtchn = alloc_unbound.port;
+#ifdef CONFIG_PROC_FS
/* And finally publish the above info in /proc/xen */
xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
if (xsd_kva_intf) {
@@ -1020,6 +1023,7 @@ static int __init xenbus_probe_init(void
xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
if (xsd_port_intf)
xsd_port_intf->read_proc = xsd_port_read;
+#endif
} else
xenstored_ready = 1;
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23
15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23
15:33:25 2006 -0600
@@ -67,6 +67,10 @@
extern unsigned long *phys_to_machine_mapping;
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int machine_to_phys_order;
+
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -84,24 +88,29 @@ static inline int phys_to_machine_mappin
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
+ extern unsigned long max_mapnr;
unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- /*
- * The array access can fail (e.g., device space beyond end of RAM).
- * In such cases it doesn't matter what we return (we return garbage),
- * but we must handle the fault without crashing!
- */
+ if (unlikely((mfn >> machine_to_phys_order) != 0))
+ return max_mapnr;
+
+ /* The array access can fail (e.g., device space beyond end of RAM). */
asm (
"1: movl %1,%0\n"
"2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %2,%0\n"
+ " jmp 2b\n"
+ ".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
- " .long 1b,2b\n"
+ " .long 1b,3b\n"
".previous"
- : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+ : "=r" (pfn)
+ : "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
return pfn;
}
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun
23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun
23 15:33:25 2006 -0600
@@ -7,6 +7,7 @@
**/
#include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
static char * __init machine_specific_memory_setup(void)
{
@@ -44,9 +45,16 @@ extern void failsafe_callback(void);
extern void failsafe_callback(void);
extern void nmi(void);
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
static void __init machine_specific_arch_setup(void)
{
int ret;
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
struct xen_platform_parameters pp;
struct callback_register event = {
.type = CALLBACKTYPE_event,
@@ -81,4 +89,13 @@ static void __init machine_specific_arch
if (HYPERVISOR_xen_version(XENVER_platform_parameters,
&pp) == 0)
set_fixaddr_top(pp.virt_start - PAGE_SIZE);
+
+ machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ }
+ while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+ machine_to_phys_order++;
}
diff -r 59d4c1863330 -r fdf25330e4a6
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Jun
23 15:26:01 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Jun
23 15:33:25 2006 -0600
@@ -85,6 +85,10 @@ void copy_page(void *, void *);
extern unsigned long *phys_to_machine_mapping;
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int machine_to_phys_order;
+
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -107,19 +111,23 @@ static inline unsigned long mfn_to_pfn(u
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- /*
- * The array access can fail (e.g., device space beyond end of RAM).
- * In such cases it doesn't matter what we return (we return garbage),
- * but we must handle the fault without crashing!
- */
+ if (unlikely((mfn >> machine_to_phys_order) != 0))
+ return end_pfn;
+
+ /* The array access can fail (e.g., device space beyond end of RAM). */
asm (
"1: movq %1,%0\n"
"2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movq %2,%0\n"
+ " jmp 2b\n"
+ ".previous\n"
".section __ex_table,\"a\"\n"
" .align 8\n"
- " .quad 1b,2b\n"
+ " .quad 1b,3b\n"
".previous"
- : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+ : "=r" (pfn)
+ : "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
return pfn;
}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/network-bridge
--- a/tools/examples/network-bridge Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/network-bridge Fri Jun 23 15:33:25 2006 -0600
@@ -151,30 +151,6 @@ link_exists()
fi
}
-
-# Usage: create_bridge bridge
-create_bridge () {
- local bridge=$1
-
- # Don't create the bridge if it already exists.
- if ! brctl show | grep -q ${bridge} ; then
- brctl addbr ${bridge}
- brctl stp ${bridge} off
- brctl setfd ${bridge} 0
- fi
- ip link set ${bridge} up
-}
-
-# Usage: add_to_bridge bridge dev
-add_to_bridge () {
- local bridge=$1
- local dev=$2
- # Don't add $dev to $bridge if it's already on a bridge.
- if ! brctl show | grep -q ${dev} ; then
- brctl addif ${bridge} ${dev}
- fi
-}
-
# Set the default forwarding policy for $dev to drop.
# Allow forwarding to the bridge.
antispoofing () {
@@ -238,14 +214,13 @@ using loopback.nloopbacks=<N> on the dom
fi
ip link set ${netdev} name ${pdev}
ip link set ${vdev} name ${netdev}
- ip link set ${pdev} down arp off
- ip link set ${pdev} addr fe:ff:ff:ff:ff:ff
- ip addr flush ${pdev}
+
+ setup_bridge_port ${pdev}
+ setup_bridge_port ${vif0}
ip link set ${netdev} addr ${mac} arp on
- add_to_bridge ${bridge} ${vif0}
+
ip link set ${bridge} up
- ip link set ${vif0} up
- ip link set ${pdev} up
+ add_to_bridge ${bridge} ${vif0}
add_to_bridge2 ${bridge} ${pdev}
do_ifup ${netdev}
else
@@ -301,6 +276,7 @@ add_to_bridge2() {
local maxtries=10
echo -n "Waiting for ${dev} to negotiate link."
+ ip link set ${dev} up
for i in `seq ${maxtries}` ; do
if ifconfig ${dev} | grep -q RUNNING ; then
break
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/vif-bridge Fri Jun 23 15:33:25 2006 -0600
@@ -48,16 +48,8 @@ fi
case "$command" in
online)
- if brctl show | grep -q "$vif"
- then
- log debug "$vif already attached to a bridge"
- exit 0
- fi
-
- brctl addif "$bridge" "$vif" ||
- fatal "brctl addif $bridge $vif failed"
-
- ifconfig "$vif" up || fatal "ifconfig $vif up failed"
+ setup_bridge_port "$vif"
+ add_to_bridge "$bridge" "$vif"
;;
offline)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/xen-network-common.sh Fri Jun 23 15:33:25 2006 -0600
@@ -104,3 +104,48 @@ find_dhcpd_init_file()
{
first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
}
+
+# configure interfaces which act as pure bridge ports:
+# - make quiet: no arp, no multicast (ipv6 autoconf)
+# - set mac address to fe:ff:ff:ff:ff:ff
+setup_bridge_port() {
+ local dev="$1"
+
+ # take interface down ...
+ ip link set ${dev} down
+
+ # ... and configure it
+ ip link set ${dev} arp off
+ ip link set ${dev} multicast off
+ ip link set ${dev} addr fe:ff:ff:ff:ff:ff
+ ip addr flush ${dev}
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+ local bridge=$1
+
+ # Don't create the bridge if it already exists.
+ if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then
+ brctl addbr ${bridge}
+ brctl stp ${bridge} off
+ brctl setfd ${bridge} 0
+ ip link set ${bridge} arp off
+ ip link set ${bridge} multicast off
+ fi
+ ip link set ${bridge} up
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+ local bridge=$1
+ local dev=$2
+
+ # Don't add $dev to $bridge if it's already on a bridge.
+ if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
+ return
+ fi
+ brctl addif ${bridge} ${dev}
+ ip link set ${dev} up
+}
+
diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/examples/xmexample.hvm Fri Jun 23 15:33:25 2006 -0600
@@ -164,3 +164,15 @@ ne2000=0
#-----------------------------------------------------------------------------
# start in full screen
#full-screen=1
+
+
+#-----------------------------------------------------------------------------
+# Enable USB support (specific devices specified at runtime through the
+# monitor window)
+#usb=1
+
+# Enable USB mouse support (only enable one of the following, `mouse' for
+# PS/2 protocol relative mouse, `tablet' for
+# absolute mouse)
+#usbdevice='mouse'
+#usbdevice='tablet'
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/Makefile
--- a/tools/firmware/acpi/Makefile Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/Makefile Fri Jun 23 15:33:25 2006 -0600
@@ -33,17 +33,16 @@ IASL_URL=http://developer.intel.com/tech
IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz
vpath iasl $(PATH)
-.PHONY: all
all:$(ACPI_BIN)
acpi_dsdt.c:acpi_dsdt.asl
$(MAKE) iasl
- iasl -oa -tc acpi_dsdt.asl
+ iasl -tc acpi_dsdt.asl
mv acpi_dsdt.hex acpi_dsdt.c
echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c
rm *.aml
+# iasl -oa -tc acpi_dsdt.asl
-.PHONY: iasl
iasl:
@echo
@echo "ACPI ASL compiler(iasl) is needed"
@@ -62,10 +61,8 @@ iasl:
$(ACPI_BIN):$(ACPI_GEN)
./$(ACPI_GEN) $(ACPI_BIN)
-.PHONY: clean
clean:
rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER)
rm -rf $(IASL_VER).tar.gz
-.PHONY: install
install: all
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi2_0.h
--- a/tools/firmware/acpi/acpi2_0.h Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi2_0.h Fri Jun 23 15:33:25 2006 -0600
@@ -323,7 +323,7 @@ typedef struct {
// The physical that acpi table reside in the guest BIOS
//#define ACPI_PHYSICAL_ADDRESS 0xE2000
#define ACPI_PHYSICAL_ADDRESS 0xEA000
-#define ACPI_TABLE_SIZE (2*1024) //Currently 2K is enough
+#define ACPI_TABLE_SIZE (4*1024) //Currently 4K is enough
void
AcpiBuildTable(uint8_t* buf);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.asl
--- a/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:33:25 2006 -0600
@@ -20,7 +20,7 @@
//**
//**
-DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL ", "XEN ", 2)
+DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006)
{
Name (\PMBS, 0x0C00)
Name (\PMLN, 0x08)
@@ -29,24 +29,33 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1,
Name (\APCB, 0xFEC00000)
Name (\APCL, 0x00010000)
Name (\PUID, 0x00)
+
Scope (\_PR)
{
Processor (CPU0, 0x00, 0x00000000, 0x00) {}
Processor (CPU1, 0x01, 0x00000000, 0x00) {}
Processor (CPU2, 0x02, 0x00000000, 0x00) {}
Processor (CPU3, 0x03, 0x00000000, 0x00) {}
+
}
/* Poweroff support - ties in with qemu emulation */
Name (\_S5, Package (0x04)
{
- 0x07,
- 0x07,
- 0x00,
+ 0x07,
+ 0x07,
+ 0x00,
0x00
})
+
+ Name(PICD, 0)
+
+ Method(_PIC, 1) {
+
+ Store(Arg0, PICD)
+ }
Scope (\_SB)
{
Device (PCI0)
@@ -55,9 +64,20 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1,
Name (_UID, 0x00)
Name (_ADR, 0x00)
Name (_BBN, 0x00)
+ OperationRegion (PIRP, PCI_Config, 0x3c, 0x10)
+ Field(PIRP, ByteAcc, NoLock, Preserve){
+ IRQ3,3,
+ IRQ5,5,
+ IRQ7,7,
+ IRQ9,9,
+ IRQA,10,
+ IRQB,11
+ }
+
Method (_CRS, 0, NotSerialized)
{
- Name (PRT0, ResourceTemplate ()
+
+ Name (PRT0, ResourceTemplate ()
{
/* bus number is from 0 - 255*/
WordBusNumber (ResourceConsumer, MinFixed, MaxFixed,
SubDecode,
@@ -79,75 +99,270 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1,
0x0FFF,
0x0000,
0x0300)
+
+ /* reserve what device model consumed for IDE and acpi pci
device */
+ WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode,
EntireRange,
+ 0x0000,
+ 0xc000,
+ 0xc01f,
+ 0x0000,
+ 0x0020)
+ /* reserve what device model consumed for Ethernet controller
pci device */
+ WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode,
EntireRange,
+ 0x0000,
+ 0xc020,
+ 0xc03f,
+ 0x0000,
+ 0x0010)
+
DWordMemory (ResourceProducer, PosDecode, MinFixed,
MaxFixed, Cacheable, ReadOnly,
0x00000000,
- 0x000A0000,
+ 0x000c0000,
0x000FFFFF,
0x00000000,
- 0x00060000)
+ 0x00030000)
+
+ /* reserve what device model consumed for PCI VGA device
*/
+
+ DWordMemory (ResourceConsumer, PosDecode, MinFixed,
MaxFixed, Cacheable, ReadWrite,
+ 0x00000000,
+ 0xF0000000,
+ 0xF1FFFFFF,
+ 0x00000000,
+ 0x02000000)
+ DWordMemory (ResourceConsumer, PosDecode, MinFixed,
MaxFixed, Cacheable, ReadWrite,
+ 0x00000000,
+ 0xF2000000,
+ 0xF2000FFF,
+ 0x00000000,
+ 0x00001000)
+ /* reserve what device model consumed for Ethernet controller
pci device */
+ DWordMemory (ResourceConsumer, PosDecode, MinFixed,
MaxFixed, Cacheable, ReadWrite,
+ 0x00000000,
+ 0xF2001000,
+ 0xF200101F,
+ 0x00000000,
+ 0x00000020)
})
Return (PRT0)
}
-
- Name (AIR0, Package (0x06)
- {
- Package (0x04)
- {
- 0x001FFFFF,
- 0x02,
- 0x00,
- 0x17
- },
-
- Package (0x04)
- {
- 0x001FFFFF,
- 0x03,
- 0x00,
- 0x13
- },
-
- Package (0x04)
- {
- 0x001DFFFF,
- 0x01,
- 0x00,
- 0x13
- },
-
- Package (0x04)
- {
- 0x001DFFFF,
- 0x00,
- 0x00,
- 0x10
- },
-
- Package (0x04)
- {
- 0x001DFFFF,
- 0x02,
- 0x00,
- 0x12
- },
-
- Package (0x04)
- {
- 0x001DFFFF,
- 0x03,
- 0x00,
- 0x17
- }
- })
- Method (_PRT, 0, NotSerialized)
- {
- Return (AIR0)
- }
-
+ Name(BUFA, ResourceTemplate() {
+ IRQ(Level, ActiveLow, Shared) {
+ 3,4,5,6,7,10,11,12,14,15}
+ })
+
+ Name(BUFB, Buffer(){
+ 0x23, 0x00, 0x00, 0x18,
+ 0x79, 0})
+
+ CreateWordField(BUFB, 0x01, IRQV)
+
+ Name(BUFC, Buffer(){
+ 5, 7, 10, 11
+ })
+
+ CreateByteField(BUFC, 0x01, PIQA)
+ CreateByteField(BUFC, 0x01, PIQB)
+ CreateByteField(BUFC, 0x01, PIQC)
+ CreateByteField(BUFC, 0x01, PIQD)
+
+ Device(LNKA) {
+ Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
+ Name(_UID, 1)
+ Method(_STA, 0) {
+ And(PIRA, 0x80, Local0)
+ If(LEqual(Local0, 0x80)) {
+ Return(0x09)
+ }
+ Else {
+ Return(0x0B)
+ }
+ }
+
+ Method(_PRS) {
+
+ Return(BUFA)
+ } // Method(_PRS)
+
+ Method(_DIS) {
+ Or(PIRA, 0x80, PIRA)
+ }
+
+ Method(_CRS) {
+ And(PIRB, 0x0f, Local0)
+ ShiftLeft(0x1, Local0, IRQV)
+ Return(BUFB)
+ }
+
+ Method(_SRS, 1) {
+ CreateWordField(ARG0, 0x01, IRQ1)
+ FindSetRightBit(IRQ1, Local0)
+ Decrement(Local0)
+ Store(Local0, PIRA)
+ } // Method(_SRS)
+ }
+
+ Device(LNKB) {
+ Name(_HID, EISAID("PNP0C0F"))
+ Name(_UID, 2)
+ Method(_STA, 0) {
+ And(PIRB, 0x80, Local0)
+ If(LEqual(Local0, 0x80)) {
+ Return(0x09)
+ }
+ Else {
+ Return(0x0B)
+ }
+ }
+
+ Method(_PRS) {
+ Return(BUFA)
+ } // Method(_PRS)
+
+ Method(_DIS) {
+
+ Or(PIRB, 0x80, PIRB)
+ }
+
+ Method(_CRS) {
+ And(PIRB, 0x0f, Local0)
+ ShiftLeft(0x1, Local0, IRQV)
+ Return(BUFB)
+ } // Method(_CRS)
+
+ Method(_SRS, 1) {
+ CreateWordField(ARG0, 0x01, IRQ1)
+ FindSetRightBit(IRQ1, Local0)
+ Decrement(Local0)
+ Store(Local0, PIRB)
+ } // Method(_SRS)
+ }
+
+ Device(LNKC) {
+ Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
+ Name(_UID, 3)
+ Method(_STA, 0) {
+ And(PIRC, 0x80, Local0)
+ If(LEqual(Local0, 0x80)) {
+ Return(0x09)
+ }
+ Else {
+ Return(0x0B)
+ }
+ }
+
+ Method(_PRS) {
+ Return(BUFA)
+ } // Method(_PRS)
+
+ Method(_DIS) {
+
+ Or(PIRC, 0x80, PIRC)
+ }
+
+ Method(_CRS) {
+ And(PIRC, 0x0f, Local0)
+ ShiftLeft(0x1, Local0, IRQV)
+ Return(BUFB)
+ } // Method(_CRS)
+
+ Method(_SRS, 1) {
+ CreateWordField(ARG0, 0x01, IRQ1)
+ FindSetRightBit(IRQ1, Local0)
+ Decrement(Local0)
+ Store(Local0, PIRC)
+ } // Method(_SRS)
+ }
+
+ Device(LNKD) {
+ Name(_HID, EISAID("PNP0C0F"))
+ Name(_UID, 4)
+ Method(_STA, 0) {
+ And(PIRD, 0x80, Local0)
+ If(LEqual(Local0, 0x80)) {
+ Return(0x09)
+ }
+ Else {
+ Return(0x0B)
+ }
+ }
+
+ Method(_PRS) {
+ Return(BUFA)
+ } // Method(_PRS)
+
+ Method(_DIS) {
+ Or(PIRD, 0x80, PIRD)
+ }
+
+ Method(_CRS) {
+ And(PIRD, 0x0f, Local0)
+ ShiftLeft(0x1, Local0, IRQV)
+ Return(BUFB)
+ } // Method(_CRS)
+
+ Method(_SRS, 1) {
+ CreateWordField(ARG0, 0x01, IRQ1)
+ FindSetRightBit(IRQ1, Local0)
+ Decrement(Local0)
+ Store(Local0, PIRD)
+ } // Method(_SRS)
+ }
+ Method(_PRT,0) {
+ If(PICD) {Return(PRTA)}
+ Return (PRTP)
+ } // end _PRT
+
+
+ Name(PRTP, Package(){
+ Package(){0x0000ffff, 0, \_SB.PCI0.LNKA, 0}, // Slot
1, INTA
+ Package(){0x0000ffff, 1, \_SB.PCI0.LNKB, 0}, // Slot
1, INTB
+ Package(){0x0000ffff, 2, \_SB.PCI0.LNKC, 0}, // Slot
1, INTC
+ Package(){0x0000ffff, 3, \_SB.PCI0.LNKD, 0}, // Slot
1, INTD
+
+ Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0}, // Slot
2, INTB
+ Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0}, // Slot
2, INTC
+ Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0}, // Slot
2, INTD
+ Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0}, // Slot
2, INTA
+
+ Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0}, // Slot
3, INTC
+ Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0}, // Slot
3, INTD
+ Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0}, // Slot
3, INTA
+ Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0}, // Slot
3, INTB
+
+ Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0}, // Slot
2, INTD
+ Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0}, // Slot
2, INTA
+ Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0}, // Slot
2, INTB
+ Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0}, // Slot
2, INTC
+
+ }
+ )
+ Name(PRTA, Package(){
+ Package(){0x0001ffff, 0, 0, 5}, // Device 1,
INTA
+
+ Package(){0x0002ffff, 0, 0, 7}, // Device 2,
INTA
+
+ Package(){0x0003ffff, 0, 0, 10}, // Device 3,
INTA
+
+ Package(){0x0003ffff, 0, 0, 11}, // Device 4,
INTA
+
+
+ }
+ )
+
Device (ISA)
{
- Name (_ADR, 0x00010000) /*TODO, device id, PCI bus num, ...*/
-
+ Name (_ADR, 0x00000000) /* device id, PCI bus num, ... */
+
+ OperationRegion(PIRQ, PCI_Config, 0x60, 0x4)
+ Scope(\) {
+ Field (\_SB.PCI0.ISA.PIRQ, ByteAcc, NoLock,
Preserve) {
+ PIRA, 8,
+ PIRB, 8,
+ PIRC, 8,
+ PIRD, 8
+ }
+ }
Device (SYSR)
{
Name (_HID, EisaId ("PNP0C02"))
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.c
--- a/tools/firmware/acpi/acpi_dsdt.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_dsdt.c Fri Jun 23 15:33:25 2006 -0600
@@ -1,22 +1,22 @@
/*
*
* Intel ACPI Component Architecture
- * ASL Optimizing Compiler / AML Disassembler version 20050624 [Aug 24 2005]
+ * ASL Optimizing Compiler / AML Disassembler version 20050513 [Jun 8 2005]
* Copyright (C) 2000 - 2005 Intel Corporation
* Supports ACPI Specification Revision 3.0
*
- * Compilation of "acpi_dsdt.asl" - Thu May 4 17:42:00 2006
+ * Compilation of "acpi_dsdt.asl" - Mon Jun 12 22:33:41 2006
*
* C source code output
*
*/
unsigned char AmlCode[] =
{
- 0x44,0x53,0x44,0x54,0x7C,0x04,0x00,0x00, /* 00000000 "DSDT|..." */
- 0x01,0x72,0x49,0x4E,0x54,0x45,0x4C,0x20, /* 00000008 ".rINTEL " */
- 0x58,0x45,0x4E,0x20,0x20,0x20,0x20,0x20, /* 00000010 "XEN " */
- 0x02,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */
- 0x24,0x06,0x05,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "$.. .PMB" */
+ 0x44,0x53,0x44,0x54,0xC3,0x08,0x00,0x00, /* 00000000 "DSDT...." */
+ 0x01,0x0C,0x49,0x4E,0x54,0x45,0x4C,0x00, /* 00000008 "..INTEL." */
+ 0x69,0x6E,0x74,0x2D,0x78,0x65,0x6E,0x00, /* 00000010 "int-xen." */
+ 0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */
+ 0x13,0x05,0x05,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */
0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */
0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */
0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */
@@ -32,129 +32,266 @@ unsigned char AmlCode[] =
0x0B,0x43,0x50,0x55,0x33,0x03,0x00,0x00, /* 00000088 ".CPU3..." */
0x00,0x00,0x00,0x08,0x5F,0x53,0x35,0x5F, /* 00000090 "...._S5_" */
0x12,0x08,0x04,0x0A,0x07,0x0A,0x07,0x00, /* 00000098 "........" */
- 0x00,0x10,0x4A,0x3D,0x5F,0x53,0x42,0x5F, /* 000000A0 "..J=_SB_" */
- 0x5B,0x82,0x42,0x3D,0x50,0x43,0x49,0x30, /* 000000A8 "[.B=PCI0" */
- 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000000B0 "._HID.A." */
- 0x0A,0x03,0x08,0x5F,0x55,0x49,0x44,0x00, /* 000000B8 "..._UID." */
- 0x08,0x5F,0x41,0x44,0x52,0x00,0x08,0x5F, /* 000000C0 "._ADR.._" */
- 0x42,0x42,0x4E,0x00,0x14,0x4A,0x06,0x5F, /* 000000C8 "BBN..J._" */
- 0x43,0x52,0x53,0x00,0x08,0x50,0x52,0x54, /* 000000D0 "CRS..PRT" */
- 0x30,0x11,0x48,0x05,0x0A,0x54,0x88,0x0D, /* 000000D8 "0.H..T.." */
- 0x00,0x02,0x0F,0x00,0x00,0x00,0x00,0x00, /* 000000E0 "........" */
- 0xFF,0x00,0x00,0x00,0x00,0x01,0x47,0x01, /* 000000E8 "......G." */
- 0xF8,0x0C,0xF8,0x0C,0x01,0x08,0x88,0x0D, /* 000000F0 "........" */
- 0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x00, /* 000000F8 "........" */
- 0xF7,0x0C,0x00,0x00,0xF8,0x0C,0x88,0x0D, /* 00000100 "........" */
- 0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x0D, /* 00000108 "........" */
- 0xFF,0x0F,0x00,0x00,0x00,0x03,0x87,0x17, /* 00000110 "........" */
- 0x00,0x00,0x0C,0x02,0x00,0x00,0x00,0x00, /* 00000118 "........" */
- 0x00,0x00,0x0A,0x00,0xFF,0xFF,0x0F,0x00, /* 00000120 "........" */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00, /* 00000128 "........" */
- 0x79,0x00,0xA4,0x50,0x52,0x54,0x30,0x08, /* 00000130 "y..PRT0." */
- 0x41,0x49,0x52,0x30,0x12,0x4F,0x04,0x06, /* 00000138 "AIR0.O.." */
- 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1F,0x00, /* 00000140 "........" */
- 0x0A,0x02,0x00,0x0A,0x17,0x12,0x0C,0x04, /* 00000148 "........" */
- 0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x00, /* 00000150 "........" */
- 0x0A,0x13,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000158 "........" */
- 0x1D,0x00,0x01,0x00,0x0A,0x13,0x12,0x0B, /* 00000160 "........" */
- 0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x00,0x00, /* 00000168 "........" */
- 0x0A,0x10,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000170 "........" */
- 0x1D,0x00,0x0A,0x02,0x00,0x0A,0x12,0x12, /* 00000178 "........" */
- 0x0C,0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x0A, /* 00000180 "........" */
- 0x03,0x00,0x0A,0x17,0x14,0x0B,0x5F,0x50, /* 00000188 "......_P" */
- 0x52,0x54,0x00,0xA4,0x41,0x49,0x52,0x30, /* 00000190 "RT..AIR0" */
- 0x5B,0x82,0x42,0x2E,0x49,0x53,0x41,0x5F, /* 00000198 "[.B.ISA_" */
- 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000001A0 "._ADR..." */
- 0x01,0x00,0x5B,0x82,0x46,0x0B,0x53,0x59, /* 000001A8 "..[.F.SY" */
- 0x53,0x52,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 000001B0 "SR._HID." */
- 0x41,0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49, /* 000001B8 "A...._UI" */
- 0x44,0x01,0x08,0x43,0x52,0x53,0x5F,0x11, /* 000001C0 "D..CRS_." */
- 0x4E,0x08,0x0A,0x8A,0x47,0x01,0x10,0x00, /* 000001C8 "N...G..." */
- 0x10,0x00,0x00,0x10,0x47,0x01,0x22,0x00, /* 000001D0 "....G."." */
- 0x22,0x00,0x00,0x0C,0x47,0x01,0x30,0x00, /* 000001D8 ""...G.0." */
- 0x30,0x00,0x00,0x10,0x47,0x01,0x44,0x00, /* 000001E0 "0...G.D." */
- 0x44,0x00,0x00,0x1C,0x47,0x01,0x62,0x00, /* 000001E8 "D...G.b." */
- 0x62,0x00,0x00,0x02,0x47,0x01,0x65,0x00, /* 000001F0 "b...G.e." */
- 0x65,0x00,0x00,0x0B,0x47,0x01,0x72,0x00, /* 000001F8 "e...G.r." */
- 0x72,0x00,0x00,0x0E,0x47,0x01,0x80,0x00, /* 00000200 "r...G..." */
- 0x80,0x00,0x00,0x01,0x47,0x01,0x84,0x00, /* 00000208 "....G..." */
- 0x84,0x00,0x00,0x03,0x47,0x01,0x88,0x00, /* 00000210 "....G..." */
- 0x88,0x00,0x00,0x01,0x47,0x01,0x8C,0x00, /* 00000218 "....G..." */
- 0x8C,0x00,0x00,0x03,0x47,0x01,0x90,0x00, /* 00000220 "....G..." */
- 0x90,0x00,0x00,0x10,0x47,0x01,0xA2,0x00, /* 00000228 "....G..." */
- 0xA2,0x00,0x00,0x1C,0x47,0x01,0xE0,0x00, /* 00000230 "....G..." */
- 0xE0,0x00,0x00,0x10,0x47,0x01,0xA0,0x08, /* 00000238 "....G..." */
- 0xA0,0x08,0x00,0x04,0x47,0x01,0xC0,0x0C, /* 00000240 "....G..." */
- 0xC0,0x0C,0x00,0x10,0x47,0x01,0xD0,0x04, /* 00000248 "....G..." */
- 0xD0,0x04,0x00,0x02,0x79,0x00,0x14,0x0B, /* 00000250 "....y..." */
- 0x5F,0x43,0x52,0x53,0x00,0xA4,0x43,0x52, /* 00000258 "_CRS..CR" */
- 0x53,0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43, /* 00000260 "S_[.+PIC" */
- 0x5F,0x08,0x5F,0x48,0x49,0x44,0x0B,0x41, /* 00000268 "_._HID.A" */
- 0xD0,0x08,0x5F,0x43,0x52,0x53,0x11,0x18, /* 00000270 ".._CRS.." */
- 0x0A,0x15,0x47,0x01,0x20,0x00,0x20,0x00, /* 00000278 "..G. . ." */
- 0x01,0x02,0x47,0x01,0xA0,0x00,0xA0,0x00, /* 00000280 "..G....." */
- 0x01,0x02,0x22,0x04,0x00,0x79,0x00,0x5B, /* 00000288 ".."..y.[" */
- 0x82,0x47,0x05,0x44,0x4D,0x41,0x30,0x08, /* 00000290 ".G.DMA0." */
- 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x02, /* 00000298 "_HID.A.." */
- 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x41, /* 000002A0 ".._CRS.A" */
- 0x04,0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01, /* 000002A8 "..=*..G." */
- 0x00,0x00,0x00,0x00,0x00,0x10,0x47,0x01, /* 000002B0 "......G." */
- 0x81,0x00,0x81,0x00,0x00,0x03,0x47,0x01, /* 000002B8 "......G." */
- 0x87,0x00,0x87,0x00,0x00,0x01,0x47,0x01, /* 000002C0 "......G." */
- 0x89,0x00,0x89,0x00,0x00,0x03,0x47,0x01, /* 000002C8 "......G." */
- 0x8F,0x00,0x8F,0x00,0x00,0x01,0x47,0x01, /* 000002D0 "......G." */
- 0xC0,0x00,0xC0,0x00,0x00,0x20,0x47,0x01, /* 000002D8 "..... G." */
- 0x80,0x04,0x80,0x04,0x00,0x10,0x79,0x00, /* 000002E0 "......y." */
- 0x5B,0x82,0x25,0x54,0x4D,0x52,0x5F,0x08, /* 000002E8 "[.%TMR_." */
- 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x01, /* 000002F0 "_HID.A.." */
- 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x10, /* 000002F8 ".._CRS.." */
- 0x0A,0x0D,0x47,0x01,0x40,0x00,0x40,0x00, /* 00000300 "..G.@.@." */
- 0x00,0x04,0x22,0x01,0x00,0x79,0x00,0x5B, /* 00000308 ".."..y.[" */
- 0x82,0x25,0x52,0x54,0x43,0x5F,0x08,0x5F, /* 00000310 ".%RTC_._" */
- 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00, /* 00000318 "HID.A..." */
- 0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A, /* 00000320 "._CRS..." */
- 0x0D,0x47,0x01,0x70,0x00,0x70,0x00,0x00, /* 00000328 ".G.p.p.." */
- 0x02,0x22,0x00,0x01,0x79,0x00,0x5B,0x82, /* 00000330 "."..y.[." */
- 0x22,0x53,0x50,0x4B,0x52,0x08,0x5F,0x48, /* 00000338 ""SPKR._H" */
- 0x49,0x44,0x0C,0x41,0xD0,0x08,0x00,0x08, /* 00000340 "ID.A...." */
- 0x5F,0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A, /* 00000348 "_CRS...." */
- 0x47,0x01,0x61,0x00,0x61,0x00,0x00,0x01, /* 00000350 "G.a.a..." */
- 0x79,0x00,0x5B,0x82,0x31,0x50,0x53,0x32, /* 00000358 "y.[.1PS2" */
- 0x4D,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000360 "M._HID.A" */
- 0xD0,0x0F,0x13,0x08,0x5F,0x43,0x49,0x44, /* 00000368 "...._CID" */
- 0x0C,0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F, /* 00000370 ".A....._" */
- 0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08, /* 00000378 "STA....." */
- 0x5F,0x43,0x52,0x53,0x11,0x08,0x0A,0x05, /* 00000380 "_CRS...." */
- 0x22,0x00,0x10,0x79,0x00,0x5B,0x82,0x42, /* 00000388 ""..y.[.B" */
- 0x04,0x50,0x53,0x32,0x4B,0x08,0x5F,0x48, /* 00000390 ".PS2K._H" */
- 0x49,0x44,0x0C,0x41,0xD0,0x03,0x03,0x08, /* 00000398 "ID.A...." */
- 0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0,0x03, /* 000003A0 "_CID.A.." */
- 0x0B,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 000003A8 "..._STA." */
- 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 000003B0 "...._CRS" */
- 0x11,0x18,0x0A,0x15,0x47,0x01,0x60,0x00, /* 000003B8 "....G.`." */
- 0x60,0x00,0x00,0x01,0x47,0x01,0x64,0x00, /* 000003C0 "`...G.d." */
- 0x64,0x00,0x00,0x01,0x22,0x02,0x00,0x79, /* 000003C8 "d..."..y" */
- 0x00,0x5B,0x82,0x3A,0x46,0x44,0x43,0x30, /* 000003D0 ".[.:FDC0" */
- 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000003D8 "._HID.A." */
- 0x07,0x00,0x14,0x09,0x5F,0x53,0x54,0x41, /* 000003E0 "...._STA" */
- 0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 000003E8 "....._CR" */
- 0x53,0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0, /* 000003F0 "S....G.." */
- 0x03,0xF0,0x03,0x01,0x06,0x47,0x01,0xF7, /* 000003F8 ".....G.." */
- 0x03,0xF7,0x03,0x01,0x01,0x22,0x40,0x00, /* 00000400 "....."@." */
- 0x2A,0x04,0x00,0x79,0x00,0x5B,0x82,0x35, /* 00000408 "*..y.[.5" */
- 0x55,0x41,0x52,0x31,0x08,0x5F,0x48,0x49, /* 00000410 "UAR1._HI" */
- 0x44,0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F, /* 00000418 "D.A...._" */
- 0x55,0x49,0x44,0x01,0x14,0x09,0x5F,0x53, /* 00000420 "UID..._S" */
- 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000428 "TA....._" */
- 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000430 "CRS....G" */
- 0x01,0xF8,0x03,0xF8,0x03,0x01,0x08,0x22, /* 00000438 "......."" */
- 0x10,0x00,0x79,0x00,0x5B,0x82,0x36,0x55, /* 00000440 "..y.[.6U" */
- 0x41,0x52,0x32,0x08,0x5F,0x48,0x49,0x44, /* 00000448 "AR2._HID" */
- 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000450 ".A...._U" */
- 0x49,0x44,0x0A,0x02,0x14,0x09,0x5F,0x53, /* 00000458 "ID...._S" */
- 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000460 "TA....._" */
- 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000468 "CRS....G" */
- 0x01,0xF8,0x02,0xF8,0x02,0x01,0x08,0x22, /* 00000470 "......."" */
- 0x08,0x00,0x79,0x00,
+ 0x00,0x08,0x50,0x49,0x43,0x44,0x00,0x14, /* 000000A0 "..PICD.." */
+ 0x0C,0x5F,0x50,0x49,0x43,0x01,0x70,0x68, /* 000000A8 "._PIC.ph" */
+ 0x50,0x49,0x43,0x44,0x10,0x4E,0x80,0x5F, /* 000000B0 "PICD.N._" */
+ 0x53,0x42,0x5F,0x5B,0x82,0x46,0x80,0x50, /* 000000B8 "SB_[.F.P" */
+ 0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44, /* 000000C0 "CI0._HID" */
+ 0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55, /* 000000C8 ".A...._U" */
+ 0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52, /* 000000D0 "ID.._ADR" */
+ 0x00,0x08,0x5F,0x42,0x42,0x4E,0x00,0x5B, /* 000000D8 ".._BBN.[" */
+ 0x80,0x50,0x49,0x52,0x50,0x02,0x0A,0x3C, /* 000000E0 ".PIRP..<" */
+ 0x0A,0x10,0x5B,0x81,0x24,0x50,0x49,0x52, /* 000000E8 "..[.$PIR" */
+ 0x50,0x01,0x49,0x52,0x51,0x33,0x03,0x49, /* 000000F0 "P.IRQ3.I" */
+ 0x52,0x51,0x35,0x05,0x49,0x52,0x51,0x37, /* 000000F8 "RQ5.IRQ7" */
+ 0x07,0x49,0x52,0x51,0x39,0x09,0x49,0x52, /* 00000100 ".IRQ9.IR" */
+ 0x51,0x41,0x0A,0x49,0x52,0x51,0x42,0x0B, /* 00000108 "QA.IRQB." */
+ 0x14,0x48,0x0D,0x5F,0x43,0x52,0x53,0x00, /* 00000110 ".H._CRS." */
+ 0x08,0x50,0x52,0x54,0x30,0x11,0x46,0x0C, /* 00000118 ".PRT0.F." */
+ 0x0A,0xC2,0x88,0x0D,0x00,0x02,0x0F,0x00, /* 00000120 "........" */
+ 0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00, /* 00000128 "........" */
+ 0x00,0x01,0x47,0x01,0xF8,0x0C,0xF8,0x0C, /* 00000130 "..G....." */
+ 0x01,0x08,0x88,0x0D,0x00,0x01,0x0C,0x03, /* 00000138 "........" */
+ 0x00,0x00,0x00,0x00,0xF7,0x0C,0x00,0x00, /* 00000140 "........" */
+ 0xF8,0x0C,0x88,0x0D,0x00,0x01,0x0C,0x03, /* 00000148 "........" */
+ 0x00,0x00,0x00,0x0D,0xFF,0x0F,0x00,0x00, /* 00000150 "........" */
+ 0x00,0x03,0x88,0x0D,0x00,0x01,0x0D,0x03, /* 00000158 "........" */
+ 0x00,0x00,0x00,0xC0,0x1F,0xC0,0x00,0x00, /* 00000160 "........" */
+ 0x20,0x00,0x88,0x0D,0x00,0x01,0x0D,0x03, /* 00000168 " ......." */
+ 0x00,0x00,0x20,0xC0,0x3F,0xC0,0x00,0x00, /* 00000170 ".. .?..." */
+ 0x10,0x00,0x87,0x17,0x00,0x00,0x0C,0x02, /* 00000178 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x00, /* 00000180 "........" */
+ 0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00, /* 00000188 "........" */
+ 0x00,0x00,0x03,0x00,0x87,0x17,0x00,0x00, /* 00000190 "........" */
+ 0x0D,0x03,0x00,0x00,0x00,0x00,0x00,0x00, /* 00000198 "........" */
+ 0x00,0xF0,0xFF,0xFF,0xFF,0xF1,0x00,0x00, /* 000001A0 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0x02,0x87,0x17, /* 000001A8 "........" */
+ 0x00,0x00,0x0D,0x03,0x00,0x00,0x00,0x00, /* 000001B0 "........" */
+ 0x00,0x00,0x00,0xF2,0xFF,0x0F,0x00,0xF2, /* 000001B8 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00, /* 000001C0 "........" */
+ 0x87,0x17,0x00,0x00,0x0D,0x03,0x00,0x00, /* 000001C8 "........" */
+ 0x00,0x00,0x00,0x10,0x00,0xF2,0x1F,0x10, /* 000001D0 "........" */
+ 0x00,0xF2,0x00,0x00,0x00,0x00,0x20,0x00, /* 000001D8 "...... ." */
+ 0x00,0x00,0x79,0x00,0xA4,0x50,0x52,0x54, /* 000001E0 "..y..PRT" */
+ 0x30,0x08,0x42,0x55,0x46,0x41,0x11,0x09, /* 000001E8 "0.BUFA.." */
+ 0x0A,0x06,0x23,0xF8,0xDC,0x18,0x79,0x00, /* 000001F0 "..#...y." */
+ 0x08,0x42,0x55,0x46,0x42,0x11,0x09,0x0A, /* 000001F8 ".BUFB..." */
+ 0x06,0x23,0x00,0x00,0x18,0x79,0x00,0x8B, /* 00000200 ".#...y.." */
+ 0x42,0x55,0x46,0x42,0x01,0x49,0x52,0x51, /* 00000208 "BUFB.IRQ" */
+ 0x56,0x08,0x42,0x55,0x46,0x43,0x11,0x07, /* 00000210 "V.BUFC.." */
+ 0x0A,0x04,0x05,0x07,0x0A,0x0B,0x8C,0x42, /* 00000218 ".......B" */
+ 0x55,0x46,0x43,0x01,0x50,0x49,0x51,0x41, /* 00000220 "UFC.PIQA" */
+ 0x8C,0x42,0x55,0x46,0x43,0x01,0x50,0x49, /* 00000228 ".BUFC.PI" */
+ 0x51,0x42,0x8C,0x42,0x55,0x46,0x43,0x01, /* 00000230 "QB.BUFC." */
+ 0x50,0x49,0x51,0x43,0x8C,0x42,0x55,0x46, /* 00000238 "PIQC.BUF" */
+ 0x43,0x01,0x50,0x49,0x51,0x44,0x5B,0x82, /* 00000240 "C.PIQD[." */
+ 0x48,0x08,0x4C,0x4E,0x4B,0x41,0x08,0x5F, /* 00000248 "H.LNKA._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 00000250 "HID.A..." */
+ 0x08,0x5F,0x55,0x49,0x44,0x01,0x14,0x1C, /* 00000258 "._UID..." */
+ 0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,0x49, /* 00000260 "_STA.{PI" */
+ 0x52,0x41,0x0A,0x80,0x60,0xA0,0x08,0x93, /* 00000268 "RA..`..." */
+ 0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04, /* 00000270 "`......." */
+ 0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52, /* 00000278 "....._PR" */
+ 0x53,0x00,0xA4,0x42,0x55,0x46,0x41,0x14, /* 00000280 "S..BUFA." */
+ 0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,0x50, /* 00000288 "._DIS.}P" */
+ 0x49,0x52,0x41,0x0A,0x80,0x50,0x49,0x52, /* 00000290 "IRA..PIR" */
+ 0x41,0x14,0x1A,0x5F,0x43,0x52,0x53,0x00, /* 00000298 "A.._CRS." */
+ 0x7B,0x50,0x49,0x52,0x42,0x0A,0x0F,0x60, /* 000002A0 "{PIRB..`" */
+ 0x79,0x01,0x60,0x49,0x52,0x51,0x56,0xA4, /* 000002A8 "y.`IRQV." */
+ 0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,0x53, /* 000002B0 "BUFB.._S" */
+ 0x52,0x53,0x01,0x8B,0x68,0x01,0x49,0x52, /* 000002B8 "RS..h.IR" */
+ 0x51,0x31,0x82,0x49,0x52,0x51,0x31,0x60, /* 000002C0 "Q1.IRQ1`" */
+ 0x76,0x60,0x70,0x60,0x50,0x49,0x52,0x41, /* 000002C8 "v`p`PIRA" */
+ 0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B,0x42, /* 000002D0 "[.I.LNKB" */
+ 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000002D8 "._HID.A." */
+ 0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44,0x0A, /* 000002E0 "..._UID." */
+ 0x02,0x14,0x1C,0x5F,0x53,0x54,0x41,0x00, /* 000002E8 "..._STA." */
+ 0x7B,0x50,0x49,0x52,0x42,0x0A,0x80,0x60, /* 000002F0 "{PIRB..`" */
+ 0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4,0x0A, /* 000002F8 "...`...." */
+ 0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14,0x0B, /* 00000300 "........" */
+ 0x5F,0x50,0x52,0x53,0x00,0xA4,0x42,0x55, /* 00000308 "_PRS..BU" */
+ 0x46,0x41,0x14,0x11,0x5F,0x44,0x49,0x53, /* 00000310 "FA.._DIS" */
+ 0x00,0x7D,0x50,0x49,0x52,0x42,0x0A,0x80, /* 00000318 ".}PIRB.." */
+ 0x50,0x49,0x52,0x42,0x14,0x1A,0x5F,0x43, /* 00000320 "PIRB.._C" */
+ 0x52,0x53,0x00,0x7B,0x50,0x49,0x52,0x42, /* 00000328 "RS.{PIRB" */
+ 0x0A,0x0F,0x60,0x79,0x01,0x60,0x49,0x52, /* 00000330 "..`y.`IR" */
+ 0x51,0x56,0xA4,0x42,0x55,0x46,0x42,0x14, /* 00000338 "QV.BUFB." */
+ 0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B,0x68, /* 00000340 "._SRS..h" */
+ 0x01,0x49,0x52,0x51,0x31,0x82,0x49,0x52, /* 00000348 ".IRQ1.IR" */
+ 0x51,0x31,0x60,0x76,0x60,0x70,0x60,0x50, /* 00000350 "Q1`v`p`P" */
+ 0x49,0x52,0x42,0x5B,0x82,0x49,0x08,0x4C, /* 00000358 "IRB[.I.L" */
+ 0x4E,0x4B,0x43,0x08,0x5F,0x48,0x49,0x44, /* 00000360 "NKC._HID" */
+ 0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F,0x55, /* 00000368 ".A...._U" */
+ 0x49,0x44,0x0A,0x03,0x14,0x1C,0x5F,0x53, /* 00000370 "ID...._S" */
+ 0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x43, /* 00000378 "TA.{PIRC" */
+ 0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A, /* 00000380 "..`...`." */
+ 0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A, /* 00000388 "........" */
+ 0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00, /* 00000390 "..._PRS." */
+ 0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F, /* 00000398 ".BUFA.._" */
+ 0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52, /* 000003A0 "DIS.}PIR" */
+ 0x43,0x0A,0x80,0x50,0x49,0x52,0x43,0x14, /* 000003A8 "C..PIRC." */
+ 0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50, /* 000003B0 "._CRS.{P" */
+ 0x49,0x52,0x43,0x0A,0x0F,0x60,0x79,0x01, /* 000003B8 "IRC..`y." */
+ 0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55, /* 000003C0 "`IRQV.BU" */
+ 0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53, /* 000003C8 "FB.._SRS" */
+ 0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31, /* 000003D0 "..h.IRQ1" */
+ 0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60, /* 000003D8 ".IRQ1`v`" */
+ 0x70,0x60,0x50,0x49,0x52,0x43,0x5B,0x82, /* 000003E0 "p`PIRC[." */
+ 0x49,0x08,0x4C,0x4E,0x4B,0x44,0x08,0x5F, /* 000003E8 "I.LNKD._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 000003F0 "HID.A..." */
+ 0x08,0x5F,0x55,0x49,0x44,0x0A,0x04,0x14, /* 000003F8 "._UID..." */
+ 0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50, /* 00000400 "._STA.{P" */
+ 0x49,0x52,0x44,0x0A,0x80,0x60,0xA0,0x08, /* 00000408 "IRD..`.." */
+ 0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1, /* 00000410 ".`......" */
+ 0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50, /* 00000418 "......_P" */
+ 0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41, /* 00000420 "RS..BUFA" */
+ 0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D, /* 00000428 ".._DIS.}" */
+ 0x50,0x49,0x52,0x44,0x0A,0x80,0x50,0x49, /* 00000430 "PIRD..PI" */
+ 0x52,0x44,0x14,0x1A,0x5F,0x43,0x52,0x53, /* 00000438 "RD.._CRS" */
+ 0x00,0x7B,0x50,0x49,0x52,0x44,0x0A,0x0F, /* 00000440 ".{PIRD.." */
+ 0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56, /* 00000448 "`y.`IRQV" */
+ 0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F, /* 00000450 ".BUFB.._" */
+ 0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49, /* 00000458 "SRS..h.I" */
+ 0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31, /* 00000460 "RQ1.IRQ1" */
+ 0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52, /* 00000468 "`v`p`PIR" */
+ 0x44,0x14,0x16,0x5F,0x50,0x52,0x54,0x00, /* 00000470 "D.._PRT." */
+ 0xA0,0x0A,0x50,0x49,0x43,0x44,0xA4,0x50, /* 00000478 "..PICD.P" */
+ 0x52,0x54,0x41,0xA4,0x50,0x52,0x54,0x50, /* 00000480 "RTA.PRTP" */
+ 0x08,0x50,0x52,0x54,0x50,0x12,0x43,0x0E, /* 00000488 ".PRTP.C." */
+ 0x10,0x12,0x0B,0x04,0x0B,0xFF,0xFF,0x00, /* 00000490 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0B,0x04, /* 00000498 "LNKA...." */
+ 0x0B,0xFF,0xFF,0x01,0x4C,0x4E,0x4B,0x42, /* 000004A0 "....LNKB" */
+ 0x00,0x12,0x0C,0x04,0x0B,0xFF,0xFF,0x0A, /* 000004A8 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0C, /* 000004B0 ".LNKC..." */
+ 0x04,0x0B,0xFF,0xFF,0x0A,0x03,0x4C,0x4E, /* 000004B8 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000004C0 "KD......" */
+ 0xFF,0x01,0x00,0x00,0x4C,0x4E,0x4B,0x42, /* 000004C8 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x01, /* 000004D0 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 000004D8 "..LNKC.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A, /* 000004E0 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E, /* 000004E8 ".LNKD..." */
+ 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03, /* 000004F0 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04, /* 000004F8 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x02,0x00,0x00,0x4C,0x4E, /* 00000500 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000508 "KC......" */
+ 0xFF,0x02,0x00,0x01,0x4C,0x4E,0x4B,0x44, /* 00000510 "....LNKD" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02, /* 00000518 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00, /* 00000520 "...LNKA." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000528 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000530 "..LNKB.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x00, /* 00000538 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000540 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x03,0x00,0x01,0x4C,0x4E, /* 00000548 "......LN" */
+ 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000550 "KA......" */
+ 0xFF,0x03,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000558 ".....LNK" */
+ 0x42,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000560 "B......." */
+ 0x03,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43, /* 00000568 "....LNKC" */
+ 0x00,0x08,0x50,0x52,0x54,0x41,0x12,0x32, /* 00000570 "..PRTA.2" */
+ 0x04,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01, /* 00000578 "........" */
+ 0x00,0x00,0x00,0x0A,0x05,0x12,0x0B,0x04, /* 00000580 "........" */
+ 0x0C,0xFF,0xFF,0x02,0x00,0x00,0x00,0x0A, /* 00000588 "........" */
+ 0x07,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03, /* 00000590 "........" */
+ 0x00,0x00,0x00,0x0A,0x0A,0x12,0x0B,0x04, /* 00000598 "........" */
+ 0x0C,0xFF,0xFF,0x03,0x00,0x00,0x00,0x0A, /* 000005A0 "........" */
+ 0x0B,0x5B,0x82,0x48,0x31,0x49,0x53,0x41, /* 000005A8 ".[.H1ISA" */
+ 0x5F,0x08,0x5F,0x41,0x44,0x52,0x00,0x5B, /* 000005B0 "_._ADR.[" */
+ 0x80,0x50,0x49,0x52,0x51,0x02,0x0A,0x60, /* 000005B8 ".PIRQ..`" */
+ 0x0A,0x04,0x10,0x2E,0x5C,0x00,0x5B,0x81, /* 000005C0 "....\.[." */
+ 0x29,0x5C,0x2F,0x04,0x5F,0x53,0x42,0x5F, /* 000005C8 ")\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x49,0x53,0x41,0x5F, /* 000005D0 "PCI0ISA_" */
+ 0x50,0x49,0x52,0x51,0x01,0x50,0x49,0x52, /* 000005D8 "PIRQ.PIR" */
+ 0x41,0x08,0x50,0x49,0x52,0x42,0x08,0x50, /* 000005E0 "A.PIRB.P" */
+ 0x49,0x52,0x43,0x08,0x50,0x49,0x52,0x44, /* 000005E8 "IRC.PIRD" */
+ 0x08,0x5B,0x82,0x46,0x0B,0x53,0x59,0x53, /* 000005F0 ".[.F.SYS" */
+ 0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000005F8 "R._HID.A" */
+ 0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,0x44, /* 00000600 "...._UID" */
+ 0x01,0x08,0x43,0x52,0x53,0x5F,0x11,0x4E, /* 00000608 "..CRS_.N" */
+ 0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,0x10, /* 00000610 "...G...." */
+ 0x00,0x00,0x10,0x47,0x01,0x22,0x00,0x22, /* 00000618 "...G."."" */
+ 0x00,0x00,0x0C,0x47,0x01,0x30,0x00,0x30, /* 00000620 "...G.0.0" */
+ 0x00,0x00,0x10,0x47,0x01,0x44,0x00,0x44, /* 00000628 "...G.D.D" */
+ 0x00,0x00,0x1C,0x47,0x01,0x62,0x00,0x62, /* 00000630 "...G.b.b" */
+ 0x00,0x00,0x02,0x47,0x01,0x65,0x00,0x65, /* 00000638 "...G.e.e" */
+ 0x00,0x00,0x0B,0x47,0x01,0x72,0x00,0x72, /* 00000640 "...G.r.r" */
+ 0x00,0x00,0x0E,0x47,0x01,0x80,0x00,0x80, /* 00000648 "...G...." */
+ 0x00,0x00,0x01,0x47,0x01,0x84,0x00,0x84, /* 00000650 "...G...." */
+ 0x00,0x00,0x03,0x47,0x01,0x88,0x00,0x88, /* 00000658 "...G...." */
+ 0x00,0x00,0x01,0x47,0x01,0x8C,0x00,0x8C, /* 00000660 "...G...." */
+ 0x00,0x00,0x03,0x47,0x01,0x90,0x00,0x90, /* 00000668 "...G...." */
+ 0x00,0x00,0x10,0x47,0x01,0xA2,0x00,0xA2, /* 00000670 "...G...." */
+ 0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,0xE0, /* 00000678 "...G...." */
+ 0x00,0x00,0x10,0x47,0x01,0xA0,0x08,0xA0, /* 00000680 "...G...." */
+ 0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,0xC0, /* 00000688 "...G...." */
+ 0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,0xD0, /* 00000690 "...G...." */
+ 0x04,0x00,0x02,0x79,0x00,0x14,0x0B,0x5F, /* 00000698 "...y..._" */
+ 0x43,0x52,0x53,0x00,0xA4,0x43,0x52,0x53, /* 000006A0 "CRS..CRS" */
+ 0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,0x5F, /* 000006A8 "_[.+PIC_" */
+ 0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,0xD0, /* 000006B0 "._HID.A." */
+ 0x08,0x5F,0x43,0x52,0x53,0x11,0x18,0x0A, /* 000006B8 "._CRS..." */
+ 0x15,0x47,0x01,0x20,0x00,0x20,0x00,0x01, /* 000006C0 ".G. . .." */
+ 0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,0x01, /* 000006C8 ".G......" */
+ 0x02,0x22,0x04,0x00,0x79,0x00,0x5B,0x82, /* 000006D0 "."..y.[." */
+ 0x47,0x05,0x44,0x4D,0x41,0x30,0x08,0x5F, /* 000006D8 "G.DMA0._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,0x00, /* 000006E0 "HID.A..." */
+ 0x08,0x5F,0x43,0x52,0x53,0x11,0x41,0x04, /* 000006E8 "._CRS.A." */
+ 0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,0x00, /* 000006F0 ".=*..G.." */
+ 0x00,0x00,0x00,0x00,0x10,0x47,0x01,0x81, /* 000006F8 ".....G.." */
+ 0x00,0x81,0x00,0x00,0x03,0x47,0x01,0x87, /* 00000700 ".....G.." */
+ 0x00,0x87,0x00,0x00,0x01,0x47,0x01,0x89, /* 00000708 ".....G.." */
+ 0x00,0x89,0x00,0x00,0x03,0x47,0x01,0x8F, /* 00000710 ".....G.." */
+ 0x00,0x8F,0x00,0x00,0x01,0x47,0x01,0xC0, /* 00000718 ".....G.." */
+ 0x00,0xC0,0x00,0x00,0x20,0x47,0x01,0x80, /* 00000720 ".... G.." */
+ 0x04,0x80,0x04,0x00,0x10,0x79,0x00,0x5B, /* 00000728 ".....y.[" */
+ 0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,0x5F, /* 00000730 ".%TMR_._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,0x00, /* 00000738 "HID.A..." */
+ 0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A, /* 00000740 "._CRS..." */
+ 0x0D,0x47,0x01,0x40,0x00,0x40,0x00,0x00, /* 00000748 ".G.@.@.." */
+ 0x04,0x22,0x01,0x00,0x79,0x00,0x5B,0x82, /* 00000750 "."..y.[." */
+ 0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,0x48, /* 00000758 "%RTC_._H" */
+ 0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,0x08, /* 00000760 "ID.A...." */
+ 0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D, /* 00000768 "_CRS...." */
+ 0x47,0x01,0x70,0x00,0x70,0x00,0x00,0x02, /* 00000770 "G.p.p..." */
+ 0x22,0x00,0x01,0x79,0x00,0x5B,0x82,0x22, /* 00000778 ""..y.[."" */
+ 0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,0x49, /* 00000780 "SPKR._HI" */
+ 0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,0x5F, /* 00000788 "D.A...._" */
+ 0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,0x47, /* 00000790 "CRS....G" */
+ 0x01,0x61,0x00,0x61,0x00,0x00,0x01,0x79, /* 00000798 ".a.a...y" */
+ 0x00,0x5B,0x82,0x31,0x50,0x53,0x32,0x4D, /* 000007A0 ".[.1PS2M" */
+ 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000007A8 "._HID.A." */
+ 0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,0x0C, /* 000007B0 "..._CID." */
+ 0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,0x53, /* 000007B8 "A....._S" */
+ 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 000007C0 "TA....._" */
+ 0x43,0x52,0x53,0x11,0x08,0x0A,0x05,0x22, /* 000007C8 "CRS...."" */
+ 0x00,0x10,0x79,0x00,0x5B,0x82,0x42,0x04, /* 000007D0 "..y.[.B." */
+ 0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,0x49, /* 000007D8 "PS2K._HI" */
+ 0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,0x5F, /* 000007E0 "D.A...._" */
+ 0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,0x0B, /* 000007E8 "CID.A..." */
+ 0x14,0x09,0x5F,0x53,0x54,0x41,0x00,0xA4, /* 000007F0 ".._STA.." */
+ 0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11, /* 000007F8 "..._CRS." */
+ 0x18,0x0A,0x15,0x47,0x01,0x60,0x00,0x60, /* 00000800 "...G.`.`" */
+ 0x00,0x00,0x01,0x47,0x01,0x64,0x00,0x64, /* 00000808 "...G.d.d" */
+ 0x00,0x00,0x01,0x22,0x02,0x00,0x79,0x00, /* 00000810 "..."..y." */
+ 0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,0x08, /* 00000818 "[.:FDC0." */
+ 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x07, /* 00000820 "_HID.A.." */
+ 0x00,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 00000828 "..._STA." */
+ 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 00000830 "...._CRS" */
+ 0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,0x03, /* 00000838 "....G..." */
+ 0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,0x03, /* 00000840 "....G..." */
+ 0xF7,0x03,0x01,0x01,0x22,0x40,0x00,0x2A, /* 00000848 "...."@.*" */
+ 0x04,0x00,0x79,0x00,0x5B,0x82,0x35,0x55, /* 00000850 "..y.[.5U" */
+ 0x41,0x52,0x31,0x08,0x5F,0x48,0x49,0x44, /* 00000858 "AR1._HID" */
+ 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000860 ".A...._U" */
+ 0x49,0x44,0x01,0x14,0x09,0x5F,0x53,0x54, /* 00000868 "ID..._ST" */
+ 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 00000870 "A....._C" */
+ 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 00000878 "RS....G." */
+ 0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,0x10, /* 00000880 "......"." */
+ 0x00,0x79,0x00,0x5B,0x82,0x36,0x55,0x41, /* 00000888 ".y.[.6UA" */
+ 0x52,0x32,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 00000890 "R2._HID." */
+ 0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,0x49, /* 00000898 "A...._UI" */
+ 0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,0x54, /* 000008A0 "D...._ST" */
+ 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 000008A8 "A....._C" */
+ 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 000008B0 "RS....G." */
+ 0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,0x08, /* 000008B8 "......"." */
+ 0x00,0x79,0x00,
};
int DsdtLen=sizeof(AmlCode);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_fadt.h
--- a/tools/firmware/acpi/acpi_fadt.h Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/firmware/acpi/acpi_fadt.h Fri Jun 23 15:33:25 2006 -0600
@@ -22,19 +22,19 @@
// FADT Definitions, see ACPI 2.0 specification for details.
//
-#define ACPI_OEM_FADT_REVISION 0x00000000 // TBD
+#define ACPI_OEM_FADT_REVISION 0x00000001 // TBD
-#define ACPI_PREFERRED_PM_PROFILE 0x04
+#define ACPI_PREFERRED_PM_PROFILE 0x00
#define ACPI_SCI_INT 0x0009
-#define ACPI_SMI_CMD 0x000000B2
+#define ACPI_SMI_CMD 0x00000000
#define ACPI_ACPI_ENABLE 0x00
#define ACPI_ACPI_DISABLE 0x00
#define ACPI_S4_BIOS_REQ 0x00
#define ACPI_PSTATE_CNT 0x00
-#define ACPI_GPE1_BASE 0x20
+#define ACPI_GPE1_BASE 0x00
#define ACPI_CST_CNT 0x00
-#define ACPI_P_LVL2_LAT 0x0065
-#define ACPI_P_LVL3_LAT 0X03E9
+#define ACPI_P_LVL2_LAT 0x0064
+#define ACPI_P_LVL3_LAT 0X03E8
#define ACPI_FLUSH_SIZE 0x00
#define ACPI_FLUSH_STRIDE 0x00
#define ACPI_DUTY_OFFSET 0x01
@@ -51,15 +51,16 @@
//
// Fixed Feature Flags
//
-#define ACPI_FIXED_FEATURE_FLAGS (ACPI_SLP_BUTTON| ACPI_WBINVD )
+#define ACPI_FIXED_FEATURE_FLAGS
(ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
//
// PM1A Event Register Block Generic Address Information
//
#define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO
-#define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x00
+#define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x20
#define ACPI_PM1A_EVT_BLK_BIT_OFFSET 0x00
-#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000008000
+//#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010
+#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c040
//
// PM1B Event Register Block Generic Address Information
@@ -73,7 +74,7 @@
// PM1A Control Register Block Generic Address Information
//
#define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO
-#define ACPI_PM1A_CNT_BLK_BIT_WIDTH 0x08
+#define ACPI_PM1A_CNT_BLK_BIT_WIDTH 0x10
#define ACPI_PM1A_CNT_BLK_BIT_OFFSET 0x00
#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/hw/pc.c Fri Jun 23 15:33:25 2006 -0600
@@ -375,7 +375,9 @@ static int serial_io[MAX_SERIAL_PORTS] =
static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
-extern int acpi_init(unsigned int base);
+//extern int acpi_init(unsigned int base);
+/* PIIX4 acpi pci configuration space, func 3 */
+extern void pci_piix4_acpi_init(PCIBus *bus);
#define NOBIOS 1
@@ -583,7 +585,9 @@ void pc_init(uint64_t ram_size, int vga_
floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
cmos_init(ram_size, boot_device, bs_table, timeoffset);
- acpi_init(0x8000);
+// using PIIX4 acpi model
+// acpi_init(0x8000);
+ pci_piix4_acpi_init(pci_bus);
if (pci_enabled && usb_enabled) {
usb_uhci_init(pci_bus, usb_root_ports);
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/hw/pci.c Fri Jun 23 15:33:25 2006 -0600
@@ -1394,7 +1394,7 @@ static uint32_t pci_bios_io_addr;
static uint32_t pci_bios_io_addr;
static uint32_t pci_bios_mem_addr;
/* host irqs corresponding to PCI irqs A-D */
-static uint8_t pci_irqs[4] = { 11, 9, 11, 9 };
+static uint8_t pci_irqs[4] = { 10, 11, 10, 11 };
static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr)
{
@@ -1447,12 +1447,22 @@ static void pci_bios_init_device(PCIDevi
pci_set_io_region_addr(d, 3, 0x374);
}
break;
+ case 0x0680:
+ if (vendor_id == 0x8086 && device_id == 0x7113) {
+ // PIIX4 ACPI PM
+ pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+ pci_config_writew(d, 0x22, 0x0000);
+ goto default_map;
+ }
+ break;
+
case 0x0300:
if (vendor_id != 0x1234)
goto default_map;
/* VGA: map frame buffer to default Bochs VBE address */
pci_set_io_region_addr(d, 0, 0xE0000000);
break;
+
case 0x0800:
/* PIC */
vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
@@ -1497,6 +1507,13 @@ static void pci_bios_init_device(PCIDevi
pic_irq = pci_irqs[pin];
pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq);
}
+ if (class== 0x0680&& vendor_id == 0x8086 && device_id == 0x7113) {
+ // PIIX4 ACPI PM
+ pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+ pci_config_writew(d, 0x22, 0x0000);
+ pci_config_writew(d, 0x3c, 0x0009); // Hardcodeed IRQ9
+ pci_config_writew(d, 0x3d, 0x0001);
+ }
}
/*
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/ioemu/target-i386-dm/Makefile Fri Jun 23 15:33:25 2006 -0600
@@ -281,7 +281,7 @@ VL_OBJS+= usb.o usb-hub.o usb-uhci.o usb
# Hardware support
VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o
VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259_stub.o pc.o port-e9.o
-VL_OBJS+= cirrus_vga.o pcnet.o acpi.o
+VL_OBJS+= cirrus_vga.o pcnet.o piix4acpi.o
VL_OBJS+= $(SOUND_HW) $(AUDIODRV) mixeng.o
ifeq ($(TARGET_ARCH), ppc)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_domain.c Fri Jun 23 15:33:25 2006 -0600
@@ -283,6 +283,17 @@ int xc_domain_setmaxmem(int xc_handle,
op.cmd = DOM0_SETDOMAINMAXMEM;
op.u.setdomainmaxmem.domain = (domid_t)domid;
op.u.setdomainmaxmem.max_memkb = max_memkb;
+ return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_set_time_offset(int xc_handle,
+ uint32_t domid,
+ int32_t time_offset_seconds)
+{
+ DECLARE_DOM0_OP;
+ op.cmd = DOM0_SETTIMEOFFSET;
+ op.u.settimeoffset.domain = (domid_t)domid;
+ op.u.settimeoffset.time_offset_seconds = time_offset_seconds;
return do_dom0_op(xc_handle, &op);
}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c Fri Jun 23 15:33:25 2006 -0600
@@ -572,42 +572,48 @@ int xc_linux_restore(int xc_handle, int
nr_pins = 0;
for (i = 0; i < max_pfn; i++) {
- if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
+ if ( (pfn_type[i] & LPINTAB) == 0 )
+ continue;
+
+ switch (pfn_type[i]) {
+
+ case (L1TAB|LPINTAB):
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case (L2TAB|LPINTAB):
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+
+ case (L3TAB|LPINTAB):
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+
+ case (L4TAB|LPINTAB):
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH) {
if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
ERR("Failed to pin batch of %d page tables", nr_pins);
goto out;
}
nr_pins = 0;
}
-
- if ( (pfn_type[i] & LPINTAB) == 0 )
- continue;
-
- switch(pfn_type[i]) {
-
- case (L1TAB|LPINTAB):
- pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
- break;
-
- case (L2TAB|LPINTAB):
- pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
- break;
-
- case (L3TAB|LPINTAB):
- pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
- break;
-
- case (L4TAB|LPINTAB):
- pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
- break;
-
- default:
- continue;
- }
-
- pin[nr_pins].arg1.mfn = p2m[i];
- nr_pins++;
-
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
+ ERR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
}
DPRINTF("\b\b\b\b100%%\n");
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xc_linux_save.c Fri Jun 23 15:33:25 2006 -0600
@@ -91,12 +91,12 @@ static inline int test_bit (int nr, vola
static inline void clear_bit (int nr, volatile void * addr)
{
- BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr));
+ BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
}
static inline void set_bit ( int nr, volatile void * addr)
{
- BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr));
+ BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
}
/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/libxc/xenctrl.h Fri Jun 23 15:33:25 2006 -0600
@@ -410,6 +410,10 @@ int xc_domain_setmaxmem(int xc_handle,
uint32_t domid,
unsigned int max_memkb);
+int xc_domain_set_time_offset(int xc_handle,
+ uint32_t domid,
+ int32_t time_offset_seconds);
+
int xc_domain_memory_increase_reservation(int xc_handle,
uint32_t domid,
unsigned long nr_extents,
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:33:25 2006 -0600
@@ -869,6 +869,30 @@ static PyObject *pyxc_domain_iomem_permi
return zero;
}
+static PyObject *pyxc_domain_set_time_offset(XcObject *self, PyObject *args)
+{
+ uint32_t dom;
+ int32_t time_offset_seconds;
+ time_t calendar_time;
+ struct tm local_time;
+ struct tm utc_time;
+
+ if (!PyArg_ParseTuple(args, "i", &dom))
+ return NULL;
+
+ calendar_time = time(NULL);
+ localtime_r(&calendar_time, &local_time);
+ gmtime_r(&calendar_time, &utc_time);
+ /* set up to get calendar time based on utc_time, with local dst setting */
+ utc_time.tm_isdst = local_time.tm_isdst;
+ time_offset_seconds = (int32_t)difftime(calendar_time, mktime(&utc_time));
+
+ if (xc_domain_set_time_offset(self->xc_handle, dom, time_offset_seconds)
!= 0)
+ return NULL;
+
+ Py_INCREF(zero);
+ return zero;
+}
static PyObject *dom_op(XcObject *self, PyObject *args,
int (*fn)(int, uint32_t))
@@ -1207,6 +1231,13 @@ static PyMethodDef pyxc_methods[] = {
METH_VARARGS, "\n"
"Returns: [int]: The size in KiB of memory spanning the given number "
"of pages.\n" },
+
+ { "domain_set_time_offset",
+ (PyCFunction)pyxc_domain_set_time_offset,
+ METH_VARARGS, "\n"
+ "Set a domain's time offset to Dom0's localtime\n"
+ " dom [int]: Domain whose time offset is being set.\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
{ NULL, NULL, 0, NULL }
};
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/xmlrpclib2.py
--- a/tools/python/xen/util/xmlrpclib2.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/util/xmlrpclib2.py Fri Jun 23 15:33:25 2006 -0600
@@ -13,7 +13,7 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#============================================================================
# Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
-# Copyright (C) 2006 XenSource Ltd.
+# Copyright (C) 2006 XenSource Inc.
#============================================================================
"""
@@ -26,11 +26,18 @@ from httplib import HTTPConnection, HTTP
from httplib import HTTPConnection, HTTP
from xmlrpclib import Transport
from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
+import SocketServer
import xmlrpclib, socket, os, stat
-import SocketServer
-import xen.xend.XendClient
from xen.xend.XendLogging import log
+
+try:
+ import SSHTransport
+ ssh_enabled = True
+except ImportError:
+ # SSHTransport is disabled on Python <2.4, because it uses the subprocess
+ # package.
+ ssh_enabled = False
# A new ServerProxy that also supports httpu urls. An http URL comes in the
@@ -39,6 +46,31 @@ from xen.xend.XendLogging import log
# httpu:///absolute/path/to/socket.sock
#
# It assumes that the RPC handler is /RPC2. This probably needs to be improved
+
+# We're forced to subclass the RequestHandler class so that we can work around
+# some bugs in Keep-Alive handling and also enabled it by default
+class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+ protocol_version = "HTTP/1.1"
+
+ # this is inspired by SimpleXMLRPCRequestHandler's do_POST but differs
+ # in a few non-trivial ways
+ # 1) we never generate internal server errors. We let the exception
+ # propagate so that it shows up in the Xend debug logs
+ # 2) we don't bother checking for a _dispatch function since we don't
+ # use one
+ def do_POST(self):
+ data = self.rfile.read(int(self.headers["content-length"]))
+ rsp = self.server._marshaled_dispatch(data)
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/xml")
+ self.send_header("Content-Length", str(len(rsp)))
+ self.end_headers()
+
+ self.wfile.write(rsp)
+ self.wfile.flush()
+ if self.close_connection == 1:
+ self.connection.shutdown(1)
class HTTPUnixConnection(HTTPConnection):
def connect(self):
@@ -75,9 +107,15 @@ class ServerProxy(xmlrpclib.ServerProxy)
if protocol == 'httpu':
uri = 'http:' + rest
transport = UnixTransport()
+ elif protocol == 'ssh':
+ global ssh_enabled
+ if ssh_enabled:
+ (transport, uri) = SSHTransport.getHTTPURI(uri)
+ else:
+ raise ValueError(
+ "SSH transport not supported on Python <2.4.")
xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding,
verbose, allow_none)
-
def __request(self, methodname, params):
response = xmlrpclib.ServerProxy.__request(self, methodname, params)
@@ -93,6 +131,10 @@ class ServerProxy(xmlrpclib.ServerProxy)
class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer):
allow_reuse_address = True
+
+ def __init__(self, addr, requestHandler=XMLRPCRequestHandler,
+ logRequests=1):
+ SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests)
def _marshaled_dispatch(self, data, dispatch_method = None):
params, method = xmlrpclib.loads(data)
@@ -121,6 +163,7 @@ class TCPXMLRPCServer(SocketServer.Threa
except xmlrpclib.Fault, fault:
response = xmlrpclib.dumps(fault)
except Exception, exn:
+ import xen.xend.XendClient
log.exception(exn)
response = xmlrpclib.dumps(
xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn)))
@@ -131,10 +174,10 @@ class TCPXMLRPCServer(SocketServer.Threa
# It implements proper support for allow_reuse_address by
# unlink()'ing an existing socket.
-class UnixXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+class UnixXMLRPCRequestHandler(XMLRPCRequestHandler):
def address_string(self):
try:
- return SimpleXMLRPCRequestHandler.address_string(self)
+ return XMLRPCRequestHandler.address_string(self)
except ValueError, e:
return self.client_address[:2]
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xend/XendClient.py Fri Jun 23 15:33:25 2006 -0600
@@ -18,6 +18,8 @@
#============================================================================
from xen.util.xmlrpclib2 import ServerProxy
+import os
+import sys
XML_RPC_SOCKET = "/var/run/xend/xmlrpc.sock"
@@ -25,4 +27,13 @@ ERROR_GENERIC = 2
ERROR_GENERIC = 2
ERROR_INVALID_DOMAIN = 3
-server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock')
+uri = 'httpu:///var/run/xend/xmlrpc.sock'
+if os.environ.has_key('XM_SERVER'):
+ uri = os.environ['XM_SERVER']
+
+try:
+ server = ServerProxy(uri)
+except ValueError, exn:
+ print >>sys.stderr, exn
+ sys.exit(1)
+
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Jun 23 15:33:25 2006 -0600
@@ -135,6 +135,7 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [
('bootloader', str),
('bootloader_args', str),
('features', str),
+ ('localtime', int),
]
ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
@@ -1259,6 +1260,10 @@ class XendDomainInfo:
self.image = image.create(self,
self.info['image'],
self.info['device'])
+
+ localtime = self.info['localtime']
+ if localtime is not None and localtime == 1:
+ xc.domain_set_time_offset(self.domid)
xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xm/create.py Fri Jun 23 15:33:25 2006 -0600
@@ -672,6 +672,8 @@ def make_config(vals):
config.append(['backend', ['netif']])
if vals.tpmif:
config.append(['backend', ['tpmif']])
+ if vals.localtime:
+ config.append(['localtime', vals.localtime])
config_image = configure_image(vals)
if vals.bootloader:
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/python/xen/xm/main.py Fri Jun 23 15:33:25 2006 -0600
@@ -41,6 +41,7 @@ import xen.xend.XendClient
import xen.xend.XendClient
from xen.xend.XendClient import server
from xen.util import security
+from select import select
# getopt.gnu_getopt is better, but only exists in Python 2.3+. Use
# getopt.getopt if gnu_getopt is not available. This will mean that options
@@ -124,6 +125,7 @@ loadpolicy_help = "loadpolicy <policy>
loadpolicy_help = "loadpolicy <policy> Load binary policy into
hypervisor"
makepolicy_help = "makepolicy <policy> Build policy and create
.bin/.map files"
labels_help = "labels [policy] [type=DOM|..] List <type> labels for
(active) policy."
+serve_help = "serve Proxy Xend XML-RPC over
stdio"
short_command_list = [
"console",
@@ -171,7 +173,8 @@ host_commands = [
host_commands = [
"dmesg",
"info",
- "log"
+ "log",
+ "serve",
]
scheduler_commands = [
@@ -273,7 +276,7 @@ for command in all_commands:
####################################################################
def arg_check(args, name, lo, hi = -1):
- n = len(args)
+ n = len([i for i in args if i != '--'])
if hi == -1:
if n != lo:
@@ -833,6 +836,32 @@ def xm_log(args):
arg_check(args, "log", 0)
print server.xend.node.log()
+
+def xm_serve(args):
+ arg_check(args, "serve", 0)
+
+ from fcntl import fcntl, F_SETFL
+
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(xen.xend.XendClient.XML_RPC_SOCKET)
+ fcntl(sys.stdin, F_SETFL, os.O_NONBLOCK)
+
+ while True:
+ iwtd, owtd, ewtd = select([sys.stdin, s], [], [])
+ if s in iwtd:
+ data = s.recv(4096)
+ if len(data) > 0:
+ sys.stdout.write(data)
+ sys.stdout.flush()
+ else:
+ break
+ if sys.stdin in iwtd:
+ data = sys.stdin.read(4096)
+ if len(data) > 0:
+ s.sendall(data)
+ else:
+ break
+ s.close()
def parse_dev_info(info):
def get_info(n, t, d):
@@ -1072,6 +1101,7 @@ commands = {
"dmesg": xm_dmesg,
"info": xm_info,
"log": xm_log,
+ "serve": xm_serve,
# scheduler
"sched-bvt": xm_sched_bvt,
"sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
diff -r 59d4c1863330 -r fdf25330e4a6 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/security/secpol_tool.c Fri Jun 23 15:33:25 2006 -0600
@@ -229,6 +229,7 @@ void acm_dump_policy_buffer(void *buf, i
#define PULL_CACHE_SIZE 8192
uint8_t pull_buffer[PULL_CACHE_SIZE];
+
int acm_domain_getpolicy(int xc_handle)
{
struct acm_getpolicy getpolicy;
@@ -236,7 +237,7 @@ int acm_domain_getpolicy(int xc_handle)
memset(pull_buffer, 0x00, sizeof(pull_buffer));
getpolicy.interface_version = ACM_INTERFACE_VERSION;
- getpolicy.pullcache = (void *) pull_buffer;
+ set_xen_guest_handle(getpolicy.pullcache, pull_buffer);
getpolicy.pullcache_size = sizeof(pull_buffer);
ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
@@ -281,7 +282,7 @@ int acm_domain_loadpolicy(int xc_handle,
/* dump it and then push it down into xen/acm */
acm_dump_policy_buffer(buffer, len);
setpolicy.interface_version = ACM_INTERFACE_VERSION;
- setpolicy.pushcache = (void *) buffer;
+ set_xen_guest_handle(setpolicy.pushcache, buffer);
setpolicy.pushcache_size = len;
ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy,
sizeof(setpolicy));
@@ -330,7 +331,7 @@ int acm_domain_dumpstats(int xc_handle)
memset(stats_buffer, 0x00, sizeof(stats_buffer));
dumpstats.interface_version = ACM_INTERFACE_VERSION;
- dumpstats.pullcache = (void *) stats_buffer;
+ set_xen_guest_handle(dumpstats.pullcache, stats_buffer);
dumpstats.pullcache_size = sizeof(stats_buffer);
ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/default
--- a/tools/xm-test/grouptest/default Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/grouptest/default Fri Jun 23 15:33:25 2006 -0600
@@ -21,7 +21,7 @@ reboot
reboot
restore
save
-sedf
+sched-credit
shutdown
sysrq
unpause
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/medium
--- a/tools/xm-test/grouptest/medium Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/grouptest/medium Fri Jun 23 15:33:25 2006 -0600
@@ -16,7 +16,7 @@ reboot
reboot
restore 02_restore_badparm_neg.test 03_restore_badfilename_neg.test
04_restore_withdevices_pos.test
save
-sedf
+sched-credit
shutdown
sysrq 01_sysrq_basic_neg.test 02_sysrq_sync_pos.test
unpause
diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/lib/XmTestLib/Console.py
--- a/tools/xm-test/lib/XmTestLib/Console.py Fri Jun 23 15:26:01 2006 -0600
+++ b/tools/xm-test/lib/XmTestLib/Console.py Fri Jun 23 15:33:25 2006 -0600
@@ -82,9 +82,6 @@ class XmConsole:
tty.setraw(self.consoleFd, termios.TCSANOW)
- self.__chewall(self.consoleFd)
-
-
def __addToHistory(self, line):
self.historyBuffer.append(line)
self.historyLines += 1
@@ -120,34 +117,47 @@ class XmConsole:
output"""
self.PROMPT = prompt
-
- def __chewall(self, fd):
+ def __getprompt(self, fd):
timeout = 0
- bytes = 0
-
- while timeout < 3:
- i, o, e = select.select([fd], [], [], 1)
- if fd in i:
- try:
- foo = os.read(fd, 1)
- if self.debugMe:
- sys.stdout.write(foo)
- bytes += 1
- except Exception, exn:
- raise ConsoleError(str(exn))
-
- else:
- timeout += 1
-
- if self.limit and bytes >= self.limit:
+ bytes = 0
+ while timeout < 180:
+ # eat anything while total bytes less than limit else raise RUNAWAY
+ while (not self.limit) or (bytes < self.limit):
+ i, o, e = select.select([fd], [], [], 1)
+ if fd in i:
+ try:
+ foo = os.read(fd, 1)
+ if self.debugMe:
+ sys.stdout.write(foo)
+ bytes += 1
+ except Exception, exn:
+ raise ConsoleError(str(exn))
+ else:
+ break
+ else:
raise ConsoleError("Console run-away (exceeded %i bytes)"
% self.limit, RUNAWAY)
-
- if self.debugMe:
- print "Ignored %i bytes of miscellaneous console output" % bytes
-
- return bytes
-
+ # press enter
+ os.write(self.consoleFd, "\n")
+ # look for prompt
+ for prompt_char in "\r\n" + self.PROMPT:
+ i, o, e = select.select([fd], [], [], 1)
+ if fd in i:
+ try:
+ foo = os.read(fd, 1)
+ if self.debugMe:
+ sys.stdout.write(foo)
+ if foo != prompt_char:
+ break
+ except Exception, exn:
+ raise ConsoleError(str(exn))
+ else:
+ timeout += 1
+ break
+ else:
+ break
+ else:
+ raise ConsoleError("Timed out waiting for console prompt")
def __runCmd(self, command, saveHistory=True):
output = ""
@@ -155,7 +165,7 @@ class XmConsole:
lines = 0
bytes = 0
- self.__chewall(self.consoleFd)
+ self.__getprompt(self.consoleFd)
if verbose:
print "[%s] Sending `%s'" % (self.domain, command)
@@ -176,7 +186,7 @@ class XmConsole:
"Failed to read from console (fd=%i): %s" %
(self.consoleFd, exn))
else:
- raise ConsoleError("Timed out waiting for console")
+ raise ConsoleError("Timed out waiting for console command")
if self.limit and bytes >= self.limit:
raise ConsoleError("Console run-away (exceeded %i bytes)"
diff -r 59d4c1863330 -r fdf25330e4a6
tools/xm-test/tests/memset/03_memset_random_pos.py
--- a/tools/xm-test/tests/memset/03_memset_random_pos.py Fri Jun 23
15:26:01 2006 -0600
+++ b/tools/xm-test/tests/memset/03_memset_random_pos.py Fri Jun 23
15:33:25 2006 -0600
@@ -22,12 +22,6 @@ except DomainError, e:
FAIL(str(e))
times = random.randint(10,50)
-
-try:
- console = XmConsole(domain.getName())
- console.sendInput("input")
-except ConsoleError, e:
- FAIL(str(e))
try:
run = console.runCmd("cat /proc/xen/balloon | grep Current");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/acm/acm_core.c Fri Jun 23 15:33:25 2006 -0600
@@ -222,9 +222,8 @@ acm_setup(unsigned int *initrdidx,
pol = (struct acm_policy_buffer *)_policy_start;
if (ntohl(pol->magic) == ACM_MAGIC)
{
- rc = acm_set_policy((void *)_policy_start,
- (u32)_policy_len,
- 0);
+ rc = do_acm_set_policy((void *)_policy_start,
+ (u32)_policy_len);
if (rc == ACM_OK)
{
printkd("Policy len 0x%lx, start at
%p.\n",_policy_len,_policy_start);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_policy.c
--- a/xen/acm/acm_policy.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/acm/acm_policy.c Fri Jun 23 15:33:25 2006 -0600
@@ -26,36 +26,43 @@
#include <xen/lib.h>
#include <xen/delay.h>
#include <xen/sched.h>
+#include <xen/guest_access.h>
#include <acm/acm_core.h>
#include <public/acm_ops.h>
#include <acm/acm_hooks.h>
#include <acm/acm_endian.h>
int
-acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size, int isuserbuffer)
+acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size)
{
u8 *policy_buffer = NULL;
- struct acm_policy_buffer *pol;
+ int ret = -EFAULT;
if (buf_size < sizeof(struct acm_policy_buffer))
return -EFAULT;
- /* 1. copy buffer from domain */
+ /* copy buffer from guest domain */
if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
return -ENOMEM;
- if (isuserbuffer) {
- if (copy_from_guest(policy_buffer, buf, buf_size))
- {
- printk("%s: Error copying!\n",__func__);
- goto error_free;
- }
- } else
- memcpy(policy_buffer, buf, buf_size);
-
- /* 2. some sanity checking */
- pol = (struct acm_policy_buffer *)policy_buffer;
-
+ if (copy_from_guest(policy_buffer, buf, buf_size))
+ {
+ printk("%s: Error copying!\n",__func__);
+ goto error_free;
+ }
+ ret = do_acm_set_policy(policy_buffer, buf_size);
+
+ error_free:
+ xfree(policy_buffer);
+ return ret;
+}
+
+
+int
+do_acm_set_policy(void *buf, u32 buf_size)
+{
+ struct acm_policy_buffer *pol = (struct acm_policy_buffer *)buf;
+ /* some sanity checking */
if ((ntohl(pol->magic) != ACM_MAGIC) ||
(buf_size != ntohl(pol->len)) ||
(ntohl(pol->policy_version) != ACM_POLICY_VERSION))
@@ -85,33 +92,31 @@ acm_set_policy(XEN_GUEST_HANDLE(void) bu
/* get bin_policy lock and rewrite policy (release old one) */
write_lock(&acm_bin_pol_rwlock);
- /* 3. set label reference name */
+ /* set label reference name */
if (acm_set_policy_reference(buf + ntohl(pol->policy_reference_offset),
ntohl(pol->primary_buffer_offset) -
ntohl(pol->policy_reference_offset)))
goto error_lock_free;
- /* 4. set primary policy data */
+ /* set primary policy data */
if (acm_primary_ops->set_binary_policy(buf +
ntohl(pol->primary_buffer_offset),
ntohl(pol->secondary_buffer_offset)
-
ntohl(pol->primary_buffer_offset)))
goto error_lock_free;
- /* 5. set secondary policy data */
+ /* set secondary policy data */
if (acm_secondary_ops->set_binary_policy(buf +
ntohl(pol->secondary_buffer_offset),
ntohl(pol->len) -
ntohl(pol->secondary_buffer_offset)))
goto error_lock_free;
write_unlock(&acm_bin_pol_rwlock);
- xfree(policy_buffer);
return ACM_OK;
error_lock_free:
write_unlock(&acm_bin_pol_rwlock);
error_free:
printk("%s: Error setting policy.\n", __func__);
- xfree(policy_buffer);
return -EFAULT;
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/linux-xen/smp.c Fri Jun 23 15:33:25 2006 -0600
@@ -421,6 +421,42 @@ smp_call_function (void (*func) (void *i
}
EXPORT_SYMBOL(smp_call_function);
+#ifdef XEN
+int
+on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info,
+ int retry, int wait)
+{
+ struct call_data_struct data;
+ unsigned int cpu, nr_cpus = cpus_weight(selected);
+
+ ASSERT(local_irq_is_enabled());
+
+ if (!nr_cpus)
+ return 0;
+
+ data.func = func;
+ data.info = info;
+ data.wait = wait;
+ atomic_set(&data.started, 0);
+ atomic_set(&data.finished, 0);
+
+ spin_lock(&call_lock);
+
+ call_data = &data;
+ wmb();
+
+ for_each_cpu_mask(cpu, selected)
+ send_IPI_single(cpu, IPI_CALL_FUNC);
+
+ while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus)
+ cpu_relax();
+
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+#endif
+
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/xen/domain.c Fri Jun 23 15:33:25 2006 -0600
@@ -895,9 +895,7 @@ int construct_dom0(struct domain *d,
sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
si->nr_pages = max_pages;
- /* Give up the VGA console if DOM0 is configured to grab it. */
- if (cmdline != NULL)
- console_endboot(strstr(cmdline, "tty0") != NULL);
+ console_endboot();
printk("Dom0: 0x%lx\n", (u64)dom0);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/ia64/xen/xensetup.c Fri Jun 23 15:33:25 2006 -0600
@@ -532,9 +532,8 @@ printk("num_online_cpus=%d, max_cpus=%d\
init_trace_bufs();
- /* Give up the VGA console if DOM0 is configured to grab it. */
if (opt_xencons)
- console_endboot(cmdline && strstr(cmdline, "tty0"));
+ console_endboot();
domain0_ready = 1;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/Makefile Fri Jun 23 15:33:25 2006 -0600
@@ -41,7 +41,7 @@ obj-y += x86_emulate.o
obj-y += x86_emulate.o
ifneq ($(pae),n)
-obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o
+obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
else
obj-$(x86_32) += shadow32.o
endif
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/audit.c Fri Jun 23 15:33:25 2006 -0600
@@ -923,8 +923,8 @@ void _audit_domain(struct domain *d, int
d->domain_id, page_to_mfn(page),
page->u.inuse.type_info,
page->count_info);
- printk("a->gpfn_and_flags=%p\n",
- (void *)a->gpfn_and_flags);
+ printk("a->gpfn_and_flags=%"PRIx64"\n",
+ (u64)a->gpfn_and_flags);
errors++;
}
break;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri Jun 23 15:33:25 2006 -0600
@@ -74,12 +74,15 @@ static void __vmx_clear_vmcs(void *info)
static void vmx_clear_vmcs(struct vcpu *v)
{
- unsigned int cpu = v->arch.hvm_vmx.active_cpu;
-
- if ( (cpu == -1) || (cpu == smp_processor_id()) )
- __vmx_clear_vmcs(v);
- else
- on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
+ int cpu = v->arch.hvm_vmx.active_cpu;
+
+ if ( cpu == -1 )
+ return;
+
+ if ( cpu == smp_processor_id() )
+ return __vmx_clear_vmcs(v);
+
+ on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
}
static void vmx_load_vmcs(struct vcpu *v)
@@ -97,6 +100,8 @@ void vmx_vmcs_enter(struct vcpu *v)
* context initialisation.
* 2. VMPTRLD as soon as we context-switch to a HVM VCPU.
* 3. VMCS destruction needs to happen later (from domain_destroy()).
+ * We can relax this a bit if a paused VCPU always commits its
+ * architectural state to a software structure.
*/
if ( v == current )
return;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 23 15:33:25 2006 -0600
@@ -1623,7 +1623,7 @@ static int mov_to_cr(int gp, int cr, str
if ( vmx_pgbit_test(v) )
{
/* The guest is a 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
unsigned long mfn, old_base_mfn;
if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
@@ -1667,7 +1667,7 @@ static int mov_to_cr(int gp, int cr, str
else
{
/* The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( (v->domain->arch.ops != NULL) &&
v->domain->arch.ops->guest_paging_levels == PAGING_L2)
{
@@ -1680,15 +1680,6 @@ static int mov_to_cr(int gp, int cr, str
{
printk("Unsupported guest paging levels\n");
/* need to take a clean path */
- domain_crash_synchronous();
- }
- }
- else
- {
- if ( !shadow_set_guest_paging_levels(v->domain,
- PAGING_L4) )
- {
- printk("Unsupported guest paging levels\n");
domain_crash_synchronous();
}
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/mm.c Fri Jun 23 15:33:25 2006 -0600
@@ -108,11 +108,20 @@
#include <public/memory.h>
#ifdef VERBOSE
-#define MEM_LOG(_f, _a...) \
- printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+#define MEM_LOG(_f, _a...) \
+ printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
current->domain->domain_id , __LINE__ , ## _a )
#else
#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/*
+ * PTE updates can be done with ordinary writes except:
+ * 1. Debug builds get extra checking by using CMPXCHG[8B].
+ * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
+ */
+#if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
+#define PTE_UPDATE_WITH_CMPXCHG
#endif
/*
@@ -261,17 +270,19 @@ void share_xen_page_with_privileged_gues
#ifdef NDEBUG
/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
-#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
#else
/*
- * In debug builds we aggressively shadow PDPTs to exercise code paths.
+ * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
* We cannot safely shadow the idle page table, nor shadow-mode page tables
- * (detected by lack of an owning domain). Always shadow PDPTs above 4GB.
+ * (detected by lack of an owning domain). As required for correctness, we
+ * always shadow PDPTs aboive 4GB.
*/
#define l3tab_needs_shadow(mfn) \
- ((((mfn << PAGE_SHIFT) != __pa(idle_pg_table)) && \
- (page_get_owner(mfn_to_page(mfn)) != NULL)) || \
- (mfn >= 0x100000))
+ (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
+ (page_get_owner(mfn_to_page(mfn)) != NULL) && \
+ ((mfn) & 1)) || /* odd MFNs are shadowed */ \
+ ((mfn) >= 0x100000))
#endif
static l1_pgentry_t *fix_pae_highmem_pl1e;
@@ -296,6 +307,8 @@ static void __write_ptbase(unsigned long
if ( !l3tab_needs_shadow(mfn) )
{
write_cr3(mfn << PAGE_SHIFT);
+ /* Cache is no longer in use or valid (/after/ write to %cr3). */
+ cache->high_mfn = 0;
return;
}
@@ -1167,20 +1180,35 @@ static inline int update_l1e(l1_pgentry_
l1_pgentry_t ol1e,
l1_pgentry_t nl1e)
{
+#ifndef PTE_UPDATE_WITH_CMPXCHG
+ return !__copy_to_user(pl1e, &nl1e, sizeof(nl1e));
+#else
intpte_t o = l1e_get_intpte(ol1e);
intpte_t n = l1e_get_intpte(nl1e);
- if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
- unlikely(o != l1e_get_intpte(ol1e)) )
- {
- MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
- ": saw %" PRIpte,
- l1e_get_intpte(ol1e),
- l1e_get_intpte(nl1e),
- o);
- return 0;
- }
+ for ( ; ; )
+ {
+ if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+ {
+ MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+ ": saw %" PRIpte,
+ l1e_get_intpte(ol1e),
+ l1e_get_intpte(nl1e),
+ o);
+ return 0;
+ }
+
+ if ( o == l1e_get_intpte(ol1e) )
+ break;
+
+ /* Allowed to change in Accessed/Dirty flags only. */
+ BUG_ON((o ^ l1e_get_intpte(ol1e)) &
+ ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
+ ol1e = l1e_from_intpte(o);
+ }
+
return 1;
+#endif
}
@@ -1228,17 +1256,24 @@ static int mod_l1_entry(l1_pgentry_t *pl
return 1;
}
-#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
- intpte_t __o = cmpxchg((intpte_t *)(_p), \
- _t ## e_get_intpte(_o), \
- _t ## e_get_intpte(_n)); \
- if ( __o != _t ## e_get_intpte(_o) ) \
- MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte \
- ": saw %" PRIpte "", \
- (_t ## e_get_intpte(_o)), \
- (_t ## e_get_intpte(_n)), \
- (__o)); \
- (__o == _t ## e_get_intpte(_o)); })
+#ifndef PTE_UPDATE_WITH_CMPXCHG
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
+#else
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
+ for ( ; ; ) \
+ { \
+ intpte_t __o = cmpxchg((intpte_t *)(_p), \
+ _t ## e_get_intpte(_o), \
+ _t ## e_get_intpte(_n)); \
+ if ( __o == _t ## e_get_intpte(_o) ) \
+ break; \
+ /* Allowed to change in Accessed/Dirty flags only. */ \
+ BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \
+ ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \
+ _o = _t ## e_from_intpte(__o); \
+ } \
+ 1; })
+#endif
/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
static int mod_l2_entry(l2_pgentry_t *pl2e,
@@ -2408,8 +2443,8 @@ static int create_grant_pte_mapping(
goto failed;
}
- if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ||
- !update_l1e(va, ol1e, _nl1e) )
+ ol1e = *(l1_pgentry_t *)va;
+ if ( !update_l1e(va, ol1e, _nl1e) )
{
put_page_type(page);
rc = GNTST_general_error;
@@ -2486,7 +2521,7 @@ static int destroy_grant_pte_mapping(
}
/* Delete pagetable entry. */
- if ( unlikely(__put_user(0, (intpte_t *)va)))
+ if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) )
{
MEM_LOG("Cannot delete PTE entry at %p", va);
put_page_type(page);
@@ -2566,7 +2601,7 @@ static int destroy_grant_va_mapping(
}
/* Delete pagetable entry. */
- if ( unlikely(__put_user(0, &pl1e->l1)) )
+ if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) )
{
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
return GNTST_general_error;
@@ -3020,6 +3055,20 @@ long arch_memory_op(int op, XEN_GUEST_HA
return 0;
}
+ case XENMEM_machphys_mapping:
+ {
+ struct xen_machphys_mapping mapping = {
+ .v_start = MACH2PHYS_VIRT_START,
+ .v_end = MACH2PHYS_VIRT_END,
+ .max_mfn = MACH2PHYS_NR_ENTRIES - 1
+ };
+
+ if ( copy_to_guest(arg, &mapping, 1) )
+ return -EFAULT;
+
+ return 0;
+ }
+
default:
return subarch_memory_op(op, arg);
}
@@ -3343,7 +3392,7 @@ static int ptwr_emulated_update(
addr &= ~(sizeof(paddr_t)-1);
if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
{
- propagate_page_fault(addr, 4); /* user mode, read fault */
+ propagate_page_fault(addr, 0); /* read fault */
return X86EMUL_PROPAGATE_FAULT;
}
/* Mask out bits provided by caller. */
@@ -3358,6 +3407,7 @@ static int ptwr_emulated_update(
old |= full;
}
+#if 0 /* XXX KAF: I don't think this can happen. */
/*
* We must not emulate an update to a PTE that is temporarily marked
* writable by the batched ptwr logic, else we can corrupt page refcnts!
@@ -3368,6 +3418,12 @@ static int ptwr_emulated_update(
if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
(l1_linear_offset(l1va) == l1_linear_offset(addr)) )
ptwr_flush(d, PTWR_PT_INACTIVE);
+#else
+ BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
+ (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+ BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
+ (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+#endif
/* Read the PTE that maps the page being updated. */
if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
@@ -3409,8 +3465,9 @@ static int ptwr_emulated_update(
}
else
{
- ol1e = *pl1e;
- *pl1e = nl1e;
+ ol1e = *pl1e;
+ if ( !update_l1e(pl1e, ol1e, nl1e) )
+ BUG();
}
unmap_domain_page(pl1e);
@@ -3475,16 +3532,18 @@ int ptwr_do_page_fault(struct domain *d,
unsigned long l2_idx;
struct x86_emulate_ctxt emul_ctxt;
- if ( unlikely(shadow_mode_enabled(d)) )
- return 0;
+ ASSERT(!shadow_mode_enabled(d));
/*
* Attempt to read the PTE that maps the VA being accessed. By checking for
* PDE validity in the L2 we avoid many expensive fixups in __get_user().
+ * NB. The L2 entry cannot be detached due to existing ptwr work: the
+ * caller already checked that.
*/
- if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
- _PAGE_PRESENT) ||
- __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
+ pl2e = &__linear_l2_table[l2_linear_offset(addr)];
+ if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
+ !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+ __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
sizeof(pte)) )
{
return 0;
@@ -3557,21 +3616,31 @@ int ptwr_do_page_fault(struct domain *d,
}
/*
- * If this is a multi-processor guest then ensure that the page is hooked
- * into at most one L2 table, which must be the one running on this VCPU.
+ * Multi-processor guest? Then ensure that the page table is hooked into
+ * at most one L2, and also ensure that there is only one mapping of the
+ * page table itself (or there can be conflicting writable mappings from
+ * other VCPUs).
*/
- if ( (d->vcpu[0]->next_in_list != NULL) &&
- ((page->u.inuse.type_info & PGT_count_mask) !=
- (!!(page->u.inuse.type_info & PGT_pinned) +
- (which == PTWR_PT_ACTIVE))) )
- {
- /* Could be conflicting writable mappings from other VCPUs. */
- cleanup_writable_pagetable(d);
- goto emulate;
+ if ( d->vcpu[0]->next_in_list != NULL )
+ {
+ if ( /* Hooked into at most one L2 table (which this VCPU maps)? */
+ ((page->u.inuse.type_info & PGT_count_mask) !=
+ (!!(page->u.inuse.type_info & PGT_pinned) +
+ (which == PTWR_PT_ACTIVE))) ||
+ /* PTEs are mapped read-only in only one place? */
+ ((page->count_info & PGC_count_mask) !=
+ (!!(page->count_info & PGC_allocated) + /* alloc count */
+ (page->u.inuse.type_info & PGT_count_mask) + /* type count */
+ 1)) ) /* map count */
+ {
+ /* Could be conflicting writable mappings from other VCPUs. */
+ cleanup_writable_pagetable(d);
+ goto emulate;
+ }
}
/*
- * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at
+ * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
* time. If there is already one, we must flush it out.
*/
if ( d->arch.ptwr[which].l1va )
@@ -3592,18 +3661,16 @@ int ptwr_do_page_fault(struct domain *d,
"pfn %lx\n", PTWR_PRINT_WHICH, addr,
l2_idx << L2_PAGETABLE_SHIFT, pfn);
- d->arch.ptwr[which].l1va = addr | 1;
- d->arch.ptwr[which].l2_idx = l2_idx;
- d->arch.ptwr[which].vcpu = current;
-
-#ifdef PERF_ARRAYS
- d->arch.ptwr[which].eip = regs->eip;
-#endif
-
/* For safety, disconnect the L1 p.t. page from current space. */
if ( which == PTWR_PT_ACTIVE )
{
- l2e_remove_flags(*pl2e, _PAGE_PRESENT);
+ l2e_remove_flags(l2e, _PAGE_PRESENT);
+ if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) )
+ {
+ MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e);
+ domain_crash(d);
+ return 0;
+ }
flush_tlb_mask(d->domain_dirty_cpumask);
}
@@ -3617,14 +3684,24 @@ int ptwr_do_page_fault(struct domain *d,
if ( unlikely(__put_user(pte.l1,
&linear_pg_table[l1_linear_offset(addr)].l1)) )
{
- MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
+ MEM_LOG("ptwr: Could not update pte at %p",
&linear_pg_table[l1_linear_offset(addr)]);
- /* Toss the writable pagetable state and crash. */
- d->arch.ptwr[which].l1va = 0;
domain_crash(d);
return 0;
}
+ /*
+ * Now record the writable pagetable state *after* any accesses that can
+ * cause a recursive page fault (i.e., those via the *_user() accessors).
+ * Otherwise we can enter ptwr_flush() with half-done ptwr state.
+ */
+ d->arch.ptwr[which].l1va = addr | 1;
+ d->arch.ptwr[which].l2_idx = l2_idx;
+ d->arch.ptwr[which].vcpu = current;
+#ifdef PERF_ARRAYS
+ d->arch.ptwr[which].eip = regs->eip;
+#endif
+
return EXCRET_fault_fixed;
emulate:
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/setup.c Fri Jun 23 15:33:25 2006 -0600
@@ -396,11 +396,13 @@ void __init __start_xen(multiboot_info_t
BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
- /* __foo are defined in public headers. Check they match internal defs. */
+ /* Check definitions in public headers match internal defs. */
BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
#ifdef HYPERVISOR_VIRT_END
BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
#endif
+ BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
+ BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
init_frametable();
@@ -596,8 +598,7 @@ void __init __start_xen(multiboot_info_t
init_trace_bufs();
- /* Give up the VGA console if DOM0 is configured to grab it. */
- console_endboot(cmdline && strstr(cmdline, "tty0"));
+ console_endboot();
/* Hide UART from DOM0 if we're using it */
serial_endboot();
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow.c Fri Jun 23 15:33:25 2006 -0600
@@ -222,6 +222,7 @@ alloc_shadow_page(struct domain *d,
unsigned long smfn, real_gpfn;
int pin = 0;
void *l1, *lp;
+ u64 index = 0;
// Currently, we only keep pre-zero'ed pages around for use as L1's...
// This will change. Soon.
@@ -354,9 +355,19 @@ alloc_shadow_page(struct domain *d,
if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
pin = 1;
#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+ /*
+ * We use PGT_l4_shadow for 2-level paging guests on PAE
+ */
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ pin = 1;
+#endif
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ index = get_cr3_idxval(current);
break;
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_fl1_shadow:
perfc_incr(shadow_l1_pages);
d->arch.shadow_page_count++;
@@ -393,7 +404,7 @@ alloc_shadow_page(struct domain *d,
//
ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
- set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+ set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index);
if ( pin )
shadow_pin(smfn);
@@ -1324,7 +1335,7 @@ increase_writable_pte_prediction(struct
prediction = (prediction & PGT_mfn_mask) | score;
//printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction,
create);
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred, 0);
if ( create )
perfc_incr(writable_pte_predictions);
@@ -1345,10 +1356,10 @@ decrease_writable_pte_prediction(struct
//printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction,
score);
if ( score )
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred, 0);
else
{
- delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred,
0);
perfc_decr(writable_pte_predictions);
}
}
@@ -1385,7 +1396,7 @@ static u32 remove_all_write_access_in_pt
int is_l1_shadow =
((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
is_l1_shadow |=
((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
PGT_fl1_shadow);
@@ -1494,7 +1505,7 @@ static int remove_all_write_access(
while ( a && a->gpfn_and_flags )
{
if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
|| (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
#endif
)
@@ -1538,8 +1549,8 @@ static void resync_pae_guest_l3(struct d
continue;
idx = get_cr3_idxval(v);
- smfn = __shadow_status(
- d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn),
PGT_l4_shadow);
+
+ smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow);
if ( !smfn )
continue;
@@ -1706,7 +1717,7 @@ static int resync_all(struct domain *d,
{
int error;
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
unsigned long gpfn;
gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
@@ -2420,17 +2431,6 @@ static void shadow_update_pagetables(str
v->arch.guest_vtable = map_domain_page_global(gmfn);
}
-#if CONFIG_PAGING_LEVELS >= 3
- /*
- * Handle 32-bit PAE enabled guest
- */
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- u32 index = get_cr3_idxval(v);
- gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
- }
-#endif
-
/*
* arch.shadow_table
*/
@@ -2443,6 +2443,23 @@ static void shadow_update_pagetables(str
if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
smfn = shadow_l3_table(v, gpfn, gmfn);
}
+ else
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+ /*
+ * We use PGT_l4_shadow for 2-level paging guests on PAE
+ */
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
+ smfn = shadow_l3_table(v, gpfn, gmfn);
+ else
+ {
+ update_top_level_shadow(v, smfn);
+ need_sync = 1;
+ }
+ }
else
#endif
if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
@@ -3093,6 +3110,36 @@ static inline unsigned long init_bl2(
return smfn;
}
+
+static inline unsigned long init_l3(
+ struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
+{
+ unsigned long smfn;
+ l4_pgentry_t *spl4e;
+ unsigned long index;
+
+ if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn,
PGT_l4_shadow))) )
+ {
+ printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn,
gmfn);
+ BUG(); /* XXX Deal gracefully wiht failure. */
+ }
+
+ /* Map the self entry, L4&L3 share the same page */
+ spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+ /*
+ * Shadow L4's pfn_info->tlbflush_timestamp
+ * should also save it's own index.
+ */
+
+ index = get_cr3_idxval(v);
+ frame_table[smfn].tlbflush_timestamp = index;
+
+ memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
+ spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+ unmap_domain_page(spl4e);
+ return smfn;
+}
#endif
#if CONFIG_PAGING_LEVELS == 3
@@ -3111,6 +3158,12 @@ static unsigned long shadow_l3_table(
d->arch.ops->guest_paging_levels == PAGING_L2 )
{
return init_bl2(d, gpfn, gmfn);
+ }
+
+ if ( SH_GUEST_32PAE &&
+ d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ return init_l3(v, gpfn, gmfn);
}
if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
@@ -3223,6 +3276,11 @@ static unsigned long shadow_l4_table(
return init_bl2(d, gpfn, gmfn);
}
+ if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ return init_l3(v, gpfn, gmfn);
+ }
+
if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
{
printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn,
gmfn);
@@ -3230,24 +3288,6 @@ static unsigned long shadow_l4_table(
}
spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
- /* For 32-bit PAE guest on 64-bit host */
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- unsigned long index;
- /*
- * Shadow L4's pfn_info->tlbflush_timestamp
- * should also save it's own index.
- */
- index = get_cr3_idxval(v);
- frame_table[smfn].tlbflush_timestamp = index;
-
- memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
- /* Map the self entry */
- spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
- unmap_domain_page(spl4e);
- return smfn;
- }
/* Install hypervisor and 4x linear p.t. mapings. */
if ( (PGT_base_page_table == PGT_l4_page_table) &&
@@ -3378,7 +3418,7 @@ validate_bl2e_change(
* This shadow_mark_va_out_of_sync() is for 2M page shadow
*/
static void shadow_mark_va_out_of_sync_2mp(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long
writable_pl1e)
+ struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e)
{
struct out_of_sync_entry *entry =
shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
@@ -3647,6 +3687,7 @@ static inline int l2e_rw_fault(
}
unmap_domain_page(l1_p);
+ *gl2e_p = gl2e;
return 1;
}
@@ -3720,7 +3761,7 @@ static inline int guest_page_fault(
ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) )
return 1;
#endif
@@ -4056,7 +4097,7 @@ struct shadow_ops MODE_32_2_HANDLER = {
};
#endif
-#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) || \
+#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined
(GUEST_32PAE) ) || \
( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow32.c Fri Jun 23 15:33:25 2006 -0600
@@ -306,7 +306,7 @@ alloc_shadow_page(struct domain *d,
//
ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
- set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+ set_shadow_status(d, gpfn, gmfn, smfn, psh_type, 0);
if ( pin )
shadow_pin(smfn);
@@ -395,7 +395,7 @@ void free_shadow_page(unsigned long smfn
ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
- delete_shadow_status(d, gpfn, gmfn, type);
+ delete_shadow_status(d, gpfn, gmfn, type, 0);
switch ( type )
{
@@ -2319,7 +2319,7 @@ increase_writable_pte_prediction(struct
prediction = (prediction & PGT_mfn_mask) | score;
//printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction,
create);
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred, 0);
if ( create )
perfc_incr(writable_pte_predictions);
@@ -2340,10 +2340,10 @@ decrease_writable_pte_prediction(struct
//printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction,
score);
if ( score )
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction,
PGT_writable_pred, 0);
else
{
- delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred,
0);
perfc_decr(writable_pte_predictions);
}
}
@@ -2381,7 +2381,7 @@ free_writable_pte_predictions(struct dom
* keep an accurate count of writable_pte_predictions to keep it
* happy.
*/
- delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
perfc_decr(writable_pte_predictions);
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_guest32pae.c
--- a/xen/arch/x86/shadow_guest32pae.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow_guest32pae.c Fri Jun 23 15:33:25 2006 -0600
@@ -1,5 +1,4 @@
#define GUEST_32PAE
-#if defined (__x86_64__)
#include "shadow.c"
struct shadow_ops MODE_64_PAE_HANDLER = {
@@ -15,4 +14,3 @@ struct shadow_ops MODE_64_PAE_HANDLER =
.gva_to_gpa = gva_to_gpa_64,
};
-#endif
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/shadow_public.c Fri Jun 23 15:33:25 2006 -0600
@@ -123,8 +123,19 @@ int shadow_set_guest_paging_levels(struc
#endif
#if CONFIG_PAGING_LEVELS == 3
case 3:
- if ( d->arch.ops != &MODE_64_3_HANDLER )
- d->arch.ops = &MODE_64_3_HANDLER;
+ if ( d->arch.ops == NULL ||
+ shadow_mode_log_dirty(d) )
+ {
+ if ( d->arch.ops != &MODE_64_3_HANDLER )
+ d->arch.ops = &MODE_64_3_HANDLER;
+ }
+ else
+ {
+ if ( d->arch.ops == &MODE_64_2_HANDLER )
+ free_shadow_pages(d);
+ if ( d->arch.ops != &MODE_64_PAE_HANDLER )
+ d->arch.ops = &MODE_64_PAE_HANDLER;
+ }
shadow_unlock(d);
return 1;
#endif
@@ -268,10 +279,8 @@ free_shadow_tables(struct domain *d, uns
put_shadow_ref(entry_get_pfn(ple[i]));
if (d->arch.ops->guest_paging_levels == PAGING_L3)
{
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
-#elif CONFIG_PAGING_LEVELS == 3
- if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 )
#endif
break;
}
@@ -710,6 +719,7 @@ void free_shadow_page(unsigned long smfn
struct domain *d = page_get_owner(mfn_to_page(gmfn));
unsigned long gpfn = mfn_to_gmfn(d, gmfn);
unsigned long type = page->u.inuse.type_info & PGT_type_mask;
+ u64 index = 0;
SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
@@ -722,12 +732,16 @@ void free_shadow_page(unsigned long smfn
if ( !mfn )
gpfn |= (1UL << 63);
}
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- if ( type == PGT_l4_shadow )
- gpfn = ((unsigned long)page->tlbflush_timestamp <<
PGT_pae_idx_shift) | gpfn;
-#endif
-
- delete_shadow_status(d, gpfn, gmfn, type);
+ {
+ if ( type == PGT_l4_shadow )
+ index = page->tlbflush_timestamp;
+ }
+#endif
+
+ delete_shadow_status(d, gpfn, gmfn, type, index);
switch ( type )
{
@@ -835,7 +849,7 @@ free_writable_pte_predictions(struct dom
while ( count )
{
count--;
- delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
}
xfree(gpfn_list);
@@ -1050,8 +1064,8 @@ void __shadow_mode_disable(struct domain
{
if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
{
- printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
- __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
+ printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n",
+ __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags);
BUG();
}
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/time.c Fri Jun 23 15:33:25 2006 -0600
@@ -699,7 +699,7 @@ void update_domain_wallclock_time(struct
{
spin_lock(&wc_lock);
version_update_begin(&d->shared_info->wc_version);
- d->shared_info->wc_sec = wc_sec;
+ d->shared_info->wc_sec = wc_sec + d->time_offset_seconds;
d->shared_info->wc_nsec = wc_nsec;
version_update_end(&d->shared_info->wc_version);
spin_unlock(&wc_lock);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/traps.c Fri Jun 23 15:33:25 2006 -0600
@@ -276,6 +276,42 @@ void show_stack(struct cpu_user_regs *re
show_trace(regs);
}
+void show_stack_overflow(unsigned long esp)
+{
+#ifdef MEMORY_GUARD
+ unsigned long esp_top = get_stack_bottom() & PAGE_MASK;
+ unsigned long *stack, addr;
+
+ /* Trigger overflow trace if %esp is within 100 bytes of the guard page. */
+ if ( ((esp - esp_top) > 100) && ((esp_top - esp) > 100) )
+ return;
+
+ if ( esp < esp_top )
+ esp = esp_top;
+
+ printk("Xen stack overflow:\n ");
+
+ stack = (unsigned long *)esp;
+ while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
+ {
+ addr = *stack++;
+ if ( is_kernel_text(addr) )
+ {
+ printk("%p: [<%p>]", stack, _p(addr));
+ print_symbol(" %s\n ", addr);
+ }
+ }
+
+ printk("\n");
+#endif
+}
+
+void show_execution_state(struct cpu_user_regs *regs)
+{
+ show_registers(regs);
+ show_stack(regs);
+}
+
/*
* This is called for faults at very unexpected times (e.g., when interrupts
* are disabled). In such situations we can't do much that is safe. We try to
@@ -297,7 +333,7 @@ asmlinkage void fatal_trap(int trapnr, s
watchdog_disable();
console_start_sync();
- show_registers(regs);
+ show_execution_state(regs);
if ( trapnr == TRAP_page_fault )
{
@@ -360,7 +396,7 @@ static inline int do_trap(int trapnr, ch
DEBUGGER_trap_fatal(trapnr, regs);
- show_registers(regs);
+ show_execution_state(regs);
panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
"[error_code=%04x]\n",
smp_processor_id(), trapnr, str, regs->error_code);
@@ -451,8 +487,23 @@ asmlinkage int do_invalid_op(struct cpu_
if ( unlikely(!guest_mode(regs)) )
{
+ char sig[5];
+ /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */
+ if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) &&
+ (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) )
+ {
+ show_execution_state(regs);
+ regs->eip += sizeof(sig);
+ return EXCRET_fault_fixed;
+ }
+ printk("%02x %02x %02x %02x %02x\n",
+ (unsigned char)sig[0],
+ (unsigned char)sig[1],
+ (unsigned char)sig[2],
+ (unsigned char)sig[3],
+ (unsigned char)sig[4]);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
- show_registers(regs);
+ show_execution_state(regs);
panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n",
smp_processor_id(), TRAP_invalid_op);
}
@@ -481,7 +532,7 @@ asmlinkage int do_int3(struct cpu_user_r
if ( !guest_mode(regs) )
{
DEBUGGER_trap_fatal(TRAP_int3, regs);
- show_registers(regs);
+ show_execution_state(regs);
panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
}
@@ -511,9 +562,9 @@ void propagate_page_fault(unsigned long
v->vcpu_info->arch.cr2 = addr;
/* Re-set error_code.user flag appropriately for the guest. */
- error_code &= ~4;
+ error_code &= ~PGERR_user_mode;
if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
- error_code |= 4;
+ error_code |= PGERR_user_mode;
ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
@@ -547,6 +598,7 @@ static int handle_gdt_ldt_mapping_fault(
{
/* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
LOCK_BIGLOCK(d);
+ cleanup_writable_pagetable(d);
ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
UNLOCK_BIGLOCK(d);
@@ -578,6 +630,98 @@ static int handle_gdt_ldt_mapping_fault(
(((va) >= HYPERVISOR_VIRT_START))
#endif
+static int __spurious_page_fault(
+ unsigned long addr, struct cpu_user_regs *regs)
+{
+ unsigned long mfn, cr3 = read_cr3();
+#if CONFIG_PAGING_LEVELS >= 4
+ l4_pgentry_t l4e, *l4t;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+ l3_pgentry_t l3e, *l3t;
+#endif
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+ unsigned int required_flags, disallowed_flags;
+
+ /* Reserved bit violations are never spurious faults. */
+ if ( regs->error_code & PGERR_reserved_bit )
+ return 0;
+
+ required_flags = _PAGE_PRESENT;
+ if ( regs->error_code & PGERR_write_access )
+ required_flags |= _PAGE_RW;
+ if ( regs->error_code & PGERR_user_mode )
+ required_flags |= _PAGE_USER;
+
+ disallowed_flags = 0;
+ if ( regs->error_code & PGERR_instr_fetch )
+ disallowed_flags |= _PAGE_NX;
+
+ mfn = cr3 >> PAGE_SHIFT;
+
+#if CONFIG_PAGING_LEVELS >= 4
+ l4t = map_domain_page(mfn);
+ l4e = l4t[l4_table_offset(addr)];
+ mfn = l4e_get_pfn(l4e);
+ unmap_domain_page(l4t);
+ if ( !(l4e_get_flags(l4e) & required_flags) ||
+ (l4e_get_flags(l4e) & disallowed_flags) )
+ return 0;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+ l3t = map_domain_page(mfn);
+#ifdef CONFIG_X86_PAE
+ l3t += (cr3 & 0xFE0UL) >> 3;
+#endif
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ unmap_domain_page(l3t);
+#ifdef CONFIG_X86_PAE
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 0;
+#else
+ if ( !(l3e_get_flags(l3e) & required_flags) ||
+ (l3e_get_flags(l3e) & disallowed_flags) )
+ return 0;
+#endif
+#endif
+
+ l2t = map_domain_page(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ unmap_domain_page(l2t);
+ if ( !(l2e_get_flags(l2e) & required_flags) ||
+ (l2e_get_flags(l2e) & disallowed_flags) )
+ return 0;
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ return 1;
+
+ l1t = map_domain_page(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ unmap_domain_page(l1t);
+ if ( !(l1e_get_flags(l1e) & required_flags) ||
+ (l1e_get_flags(l1e) & disallowed_flags) )
+ return 0;
+ return 1;
+}
+
+static int spurious_page_fault(
+ unsigned long addr, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ int is_spurious;
+
+ LOCK_BIGLOCK(d);
+ cleanup_writable_pagetable(d);
+ is_spurious = __spurious_page_fault(addr, regs);
+ UNLOCK_BIGLOCK(d);
+
+ return is_spurious;
+}
+
static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -590,12 +734,17 @@ static int fixup_page_fault(unsigned lon
if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
return handle_gdt_ldt_mapping_fault(
addr - GDT_LDT_VIRT_START, regs);
- }
- else if ( unlikely(shadow_mode_enabled(d)) )
- {
+ /*
+ * Do not propagate spurious faults in the hypervisor area to the
+ * guest. It cannot fix them up.
+ */
+ return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
+ }
+
+ if ( unlikely(shadow_mode_enabled(d)) )
return shadow_fault(addr, regs);
- }
- else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+
+ if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
{
LOCK_BIGLOCK(d);
if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
@@ -607,9 +756,14 @@ static int fixup_page_fault(unsigned lon
return EXCRET_fault_fixed;
}
+ /*
+ * Note it is *not* safe to check PGERR_page_present here. It can be
+ * clear, due to unhooked page table, when we would otherwise expect
+ * it to be set. We have an aversion to trusting that flag in Xen, and
+ * guests ought to be leery too.
+ */
if ( guest_kernel_mode(v, regs) &&
- /* Protection violation on write? No reserved-bit violation? */
- ((regs->error_code & 0xb) == 0x3) &&
+ (regs->error_code & PGERR_write_access) &&
ptwr_do_page_fault(d, addr, regs) )
{
UNLOCK_BIGLOCK(d);
@@ -619,46 +773,6 @@ static int fixup_page_fault(unsigned lon
}
return 0;
-}
-
-static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- int rc;
-
- /*
- * The only possible reason for a spurious page fault not to be picked
- * up already is that a page directory was unhooked by writable page table
- * logic and then reattached before the faulting VCPU could detect it.
- */
- if ( is_idle_domain(d) || /* no ptwr in idle domain */
- IN_HYPERVISOR_RANGE(addr) || /* no ptwr on hypervisor addrs */
- shadow_mode_enabled(d) || /* no ptwr logic in shadow mode */
- ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault? */
- return 0;
-
- LOCK_BIGLOCK(d);
-
- /*
- * The page directory could have been detached again while we weren't
- * holding the per-domain lock. Detect that and fix up if it's the case.
- */
- if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
- unlikely(l2_linear_offset(addr) ==
- d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
- {
- ptwr_flush(d, PTWR_PT_ACTIVE);
- rc = 1;
- }
- else
- {
- /* Okay, walk the page tables. Only check for not-present faults.*/
- rc = __spurious_page_fault(addr);
- }
-
- UNLOCK_BIGLOCK(d);
- return rc;
}
/*
@@ -703,7 +817,7 @@ asmlinkage int do_page_fault(struct cpu_
DEBUGGER_trap_fatal(TRAP_page_fault, regs);
- show_registers(regs);
+ show_execution_state(regs);
show_page_walk(addr);
panic("CPU%d FATAL PAGE FAULT\n"
"[error_code=%04x]\n"
@@ -784,8 +898,6 @@ static inline int admin_io_okay(
(admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
/* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT 4 /* user mode, read fault */
-#define USER_WRITE_FAULT 6 /* user mode, write fault */
#define PAGE_FAULT(_faultaddr, _errcode) \
({ propagate_page_fault(_faultaddr, _errcode); \
return EXCRET_fault_fixed; \
@@ -795,7 +907,7 @@ static inline int admin_io_okay(
#define insn_fetch(_type, _size, _ptr) \
({ unsigned long _x; \
if ( get_user(_x, (_type *)eip) ) \
- PAGE_FAULT(eip, USER_READ_FAULT); \
+ PAGE_FAULT(eip, 0); /* read fault */ \
eip += _size; (_type)_x; })
static int emulate_privileged_op(struct cpu_user_regs *regs)
@@ -864,17 +976,17 @@ static int emulate_privileged_op(struct
case 1:
data = (u8)inb_user((u16)regs->edx, v, regs);
if ( put_user((u8)data, (u8 *)regs->edi) )
- PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ PAGE_FAULT(regs->edi, PGERR_write_access);
break;
case 2:
data = (u16)inw_user((u16)regs->edx, v, regs);
if ( put_user((u16)data, (u16 *)regs->edi) )
- PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ PAGE_FAULT(regs->edi, PGERR_write_access);
break;
case 4:
data = (u32)inl_user((u16)regs->edx, v, regs);
if ( put_user((u32)data, (u32 *)regs->edi) )
- PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ PAGE_FAULT(regs->edi, PGERR_write_access);
break;
}
regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -889,17 +1001,17 @@ static int emulate_privileged_op(struct
{
case 1:
if ( get_user(data, (u8 *)regs->esi) )
- PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ PAGE_FAULT(regs->esi, 0); /* read fault */
outb_user((u8)data, (u16)regs->edx, v, regs);
break;
case 2:
if ( get_user(data, (u16 *)regs->esi) )
- PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ PAGE_FAULT(regs->esi, 0); /* read fault */
outw_user((u16)data, (u16)regs->edx, v, regs);
break;
case 4:
if ( get_user(data, (u32 *)regs->esi) )
- PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ PAGE_FAULT(regs->esi, 0); /* read fault */
outl_user((u32)data, (u16)regs->edx, v, regs);
break;
}
@@ -1082,7 +1194,7 @@ static int emulate_privileged_op(struct
v->arch.guest_context.ctrlreg[2] = *reg;
v->vcpu_info->arch.cr2 = *reg;
break;
-
+
case 3: /* Write CR3 */
LOCK_BIGLOCK(v->domain);
cleanup_writable_pagetable(v->domain);
@@ -1270,7 +1382,7 @@ asmlinkage int do_general_protection(str
DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
hardware_gp:
- show_registers(regs);
+ show_execution_state(regs);
panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
smp_processor_id(), regs->error_code);
return 0;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_32/seg_fixup.c Fri Jun 23 15:33:25 2006 -0600
@@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs
return 0;
page_fault:
- propagate_page_fault((unsigned long)pb, 4);
+ propagate_page_fault((unsigned long)pb, 0); /* read fault */
return EXCRET_fault_fixed;
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_32/traps.c Fri Jun 23 15:33:25 2006 -0600
@@ -68,13 +68,11 @@ void show_registers(struct cpu_user_regs
"ss: %04x cs: %04x\n",
fault_regs.ds, fault_regs.es, fault_regs.fs,
fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
- show_stack(regs);
}
void show_page_walk(unsigned long addr)
{
- unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+ unsigned long pfn, mfn, cr3 = read_cr3();
#ifdef CONFIG_X86_PAE
l3_pgentry_t l3e, *l3t;
#endif
@@ -83,8 +81,11 @@ void show_page_walk(unsigned long addr)
printk("Pagetable walk from %08lx:\n", addr);
+ mfn = cr3 >> PAGE_SHIFT;
+
#ifdef CONFIG_X86_PAE
- l3t = map_domain_page(mfn);
+ l3t = map_domain_page(mfn);
+ l3t += (cr3 & 0xFE0UL) >> 3;
l3e = l3t[l3_table_offset(addr)];
mfn = l3e_get_pfn(l3e);
pfn = get_gpfn_from_mfn(mfn);
@@ -111,40 +112,6 @@ void show_page_walk(unsigned long addr)
pfn = get_gpfn_from_mfn(mfn);
printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
unmap_domain_page(l1t);
-}
-
-int __spurious_page_fault(unsigned long addr)
-{
- unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-#ifdef CONFIG_X86_PAE
- l3_pgentry_t l3e, *l3t;
-#endif
- l2_pgentry_t l2e, *l2t;
- l1_pgentry_t l1e, *l1t;
-
-#ifdef CONFIG_X86_PAE
- l3t = map_domain_page(mfn);
- l3e = l3t[l3_table_offset(addr)];
- mfn = l3e_get_pfn(l3e);
- unmap_domain_page(l3t);
- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- return 0;
-#endif
-
- l2t = map_domain_page(mfn);
- l2e = l2t[l2_table_offset(addr)];
- mfn = l2e_get_pfn(l2e);
- unmap_domain_page(l2t);
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- return 0;
- if ( l2e_get_flags(l2e) & _PAGE_PSE )
- return 1;
-
- l1t = map_domain_page(mfn);
- l1e = l1t[l1_table_offset(addr)];
- mfn = l1e_get_pfn(l1e);
- unmap_domain_page(l1t);
- return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
}
#define DOUBLEFAULT_STACK_SIZE 1024
@@ -173,6 +140,7 @@ asmlinkage void do_double_fault(void)
tss->esi, tss->edi, tss->ebp, tss->esp);
printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
+ show_stack_overflow(tss->esp);
printk("************************************\n");
printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
printk("System needs manual reset.\n");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_64/traps.c Fri Jun 23 15:33:25 2006 -0600
@@ -68,8 +68,6 @@ void show_registers(struct cpu_user_regs
"ss: %04x cs: %04x\n",
fault_regs.ds, fault_regs.es, fault_regs.fs,
fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
- show_stack(regs);
}
void show_page_walk(unsigned long addr)
@@ -115,40 +113,6 @@ void show_page_walk(unsigned long addr)
printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
}
-int __spurious_page_fault(unsigned long addr)
-{
- unsigned long mfn = read_cr3() >> PAGE_SHIFT;
- l4_pgentry_t l4e, *l4t;
- l3_pgentry_t l3e, *l3t;
- l2_pgentry_t l2e, *l2t;
- l1_pgentry_t l1e, *l1t;
-
- l4t = mfn_to_virt(mfn);
- l4e = l4t[l4_table_offset(addr)];
- mfn = l4e_get_pfn(l4e);
- if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
- return 0;
-
- l3t = mfn_to_virt(mfn);
- l3e = l3t[l3_table_offset(addr)];
- mfn = l3e_get_pfn(l3e);
- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- return 0;
-
- l2t = mfn_to_virt(mfn);
- l2e = l2t[l2_table_offset(addr)];
- mfn = l2e_get_pfn(l2e);
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- return 0;
- if ( l2e_get_flags(l2e) & _PAGE_PSE )
- return 1;
-
- l1t = mfn_to_virt(mfn);
- l1e = l1t[l1_table_offset(addr)];
- mfn = l1e_get_pfn(l1e);
- return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
-}
-
asmlinkage void double_fault(void);
asmlinkage void do_double_fault(struct cpu_user_regs *regs)
{
@@ -159,6 +123,7 @@ asmlinkage void do_double_fault(struct c
/* Find information saved during fault and dump it to the console. */
printk("************************************\n");
show_registers(regs);
+ show_stack_overflow(regs->rsp);
printk("************************************\n");
printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
printk("System needs manual reset.\n");
diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/arch/x86/x86_emulate.c Fri Jun 23 15:33:25 2006 -0600
@@ -1146,7 +1146,7 @@ x86_emulate_read_std(
*val = 0;
if ( copy_from_user((void *)val, (void *)addr, bytes) )
{
- propagate_page_fault(addr, 4); /* user mode, read fault */
+ propagate_page_fault(addr, 0); /* read fault */
return X86EMUL_PROPAGATE_FAULT;
}
return X86EMUL_CONTINUE;
@@ -1161,7 +1161,7 @@ x86_emulate_write_std(
{
if ( copy_to_user((void *)addr, (void *)&val, bytes) )
{
- propagate_page_fault(addr, 6); /* user mode, write fault */
+ propagate_page_fault(addr, PGERR_write_access); /* write fault */
return X86EMUL_PROPAGATE_FAULT;
}
return X86EMUL_CONTINUE;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/acm_ops.c
--- a/xen/common/acm_ops.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/acm_ops.c Fri Jun 23 15:33:25 2006 -0600
@@ -69,7 +69,7 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE
return -EACCES;
rc = acm_set_policy(setpolicy.pushcache,
- setpolicy.pushcache_size, 1);
+ setpolicy.pushcache_size);
break;
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/dom0_ops.c Fri Jun 23 15:33:25 2006 -0600
@@ -693,6 +693,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
break;
#endif
+ case DOM0_SETTIMEOFFSET:
+ {
+ struct domain *d;
+
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.settimeoffset.domain);
+ if ( d != NULL )
+ {
+ d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
+ put_domain(d);
+ ret = 0;
+ }
+ }
+ break;
+
default:
ret = arch_do_dom0_op(op, u_dom0_op);
break;
@@ -701,9 +716,9 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
spin_unlock(&dom0_lock);
if (!ret)
- acm_post_dom0_op(op, ssid);
+ acm_post_dom0_op(op, &ssid);
else
- acm_fail_dom0_op(op, ssid);
+ acm_fail_dom0_op(op, &ssid);
return ret;
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/domain.c
--- a/xen/common/domain.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/domain.c Fri Jun 23 15:33:25 2006 -0600
@@ -234,7 +234,7 @@ void __domain_crash(struct domain *d)
{
printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
d->domain_id, current->vcpu_id, smp_processor_id());
- show_registers(guest_cpu_user_regs());
+ show_execution_state(guest_cpu_user_regs());
}
else
{
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/kernel.c
--- a/xen/common/kernel.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/kernel.c Fri Jun 23 15:33:25 2006 -0600
@@ -96,10 +96,11 @@ char *print_tainted(char *str)
{
if ( tainted )
{
- snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c",
+ snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c%c",
tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
- tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+ tainted & TAINT_BAD_PAGE ? 'B' : ' ',
+ tainted & TAINT_SYNC_CONSOLE ? 'C' : ' ');
}
else
{
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/keyhandler.c Fri Jun 23 15:33:25 2006 -0600
@@ -87,10 +87,28 @@ static void show_handlers(unsigned char
key_table[i].desc);
}
+static void __dump_execstate(void *unused)
+{
+ dump_execution_state();
+}
+
static void dump_registers(unsigned char key, struct cpu_user_regs *regs)
{
+ unsigned int cpu;
+
printk("'%c' pressed -> dumping registers\n", key);
- show_registers(regs);
+
+ /* Get local execution state out immediately, in case we get stuck. */
+ printk("\n*** Dumping CPU%d state: ***\n", smp_processor_id());
+ show_execution_state(regs);
+
+ for_each_online_cpu ( cpu )
+ {
+ if ( cpu == smp_processor_id() )
+ continue;
+ printk("\n*** Dumping CPU%d state: ***\n", cpu);
+ on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1);
+ }
}
static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/memory.c
--- a/xen/common/memory.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/memory.c Fri Jun 23 15:33:25 2006 -0600
@@ -282,7 +282,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
LIST_HEAD(in_chunk_list);
LIST_HEAD(out_chunk_list);
unsigned long in_chunk_order, out_chunk_order;
- unsigned long gpfn, gmfn, mfn;
+ xen_pfn_t gpfn, gmfn, mfn;
unsigned long i, j, k;
unsigned int memflags = 0;
long rc = 0;
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/sched_credit.c Fri Jun 23 15:33:25 2006 -0600
@@ -967,9 +967,6 @@ csched_load_balance(int cpu, struct csch
if ( peer_cpu == cpu )
break;
- BUG_ON( peer_cpu >= csched_priv.ncpus );
- BUG_ON( peer_cpu == cpu );
-
/*
* Get ahold of the scheduler lock for this peer CPU.
*
@@ -1072,7 +1069,6 @@ csched_schedule(s_time_t now)
ret.task = snext->vcpu;
CSCHED_VCPU_CHECK(ret.task);
- BUG_ON( !vcpu_runnable(ret.task) );
return ret;
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/sched_sedf.c Fri Jun 23 15:33:25 2006 -0600
@@ -360,24 +360,23 @@ static int sedf_init_vcpu(struct vcpu *v
INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
}
+ /* Every VCPU gets an equal share of extratime by default. */
+ inf->deadl_abs = 0;
+ inf->latency = 0;
+ inf->status = EXTRA_AWARE | SEDF_ASLEEP;
+ inf->extraweight = 1;
+
if ( v->domain->domain_id == 0 )
{
- /*set dom0 to something useful to boot the machine*/
+ /* Domain0 gets 75% guaranteed (15ms every 20ms). */
inf->period = MILLISECS(20);
inf->slice = MILLISECS(15);
- inf->latency = 0;
- inf->deadl_abs = 0;
- inf->status = EXTRA_AWARE | SEDF_ASLEEP;
}
else
{
- /*other domains run in best effort mode*/
+ /* Best-effort extratime only. */
inf->period = WEIGHT_PERIOD;
inf->slice = 0;
- inf->deadl_abs = 0;
- inf->latency = 0;
- inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- inf->extraweight = 1;
}
inf->period_orig = inf->period; inf->slice_orig = inf->slice;
@@ -609,7 +608,16 @@ static void desched_extra_dom(s_time_t n
PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
inf->short_block_lost_tot);
+#if 0
+ /*
+ * KAF: If we don't exit short-blocking state at this point
+ * domain0 can steal all CPU for up to 10 seconds before
+ * scheduling settles down (when competing against another
+ * CPU-bound domain). Doing this seems to make things behave
+ * nicely. Noone gets starved by default.
+ */
if ( inf->short_block_lost_tot <= 0 )
+#endif
{
PRINT(4,"Domain %i.%i compensated short block loss!\n",
inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/schedule.c
--- a/xen/common/schedule.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/common/schedule.c Fri Jun 23 15:33:25 2006 -0600
@@ -33,8 +33,8 @@
extern void arch_getdomaininfo_ctxt(struct vcpu *,
struct vcpu_guest_context *);
-/* opt_sched: scheduler - default to SEDF */
-static char opt_sched[10] = "sedf";
+/* opt_sched: scheduler - default to credit */
+static char opt_sched[10] = "credit";
string_param("sched", opt_sched);
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
diff -r 59d4c1863330 -r fdf25330e4a6 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/drivers/char/console.c Fri Jun 23 15:33:25 2006 -0600
@@ -476,7 +476,11 @@ void init_console(void)
if ( strncmp(p, "com", 3) == 0 )
sercon_handle = serial_parse_handle(p);
else if ( strncmp(p, "vga", 3) == 0 )
+ {
vgacon_enabled = 1;
+ if ( strncmp(p+3, "[keep]", 6) == 0 )
+ vgacon_enabled++;
+ }
}
init_vga();
@@ -497,14 +501,47 @@ void init_console(void)
if ( opt_sync_console )
{
serial_start_sync(sercon_handle);
+ add_taint(TAINT_SYNC_CONSOLE);
printk("Console output is synchronous.\n");
}
}
-void console_endboot(int disable_vga)
-{
- if ( disable_vga )
- vgacon_enabled = 0;
+void console_endboot(void)
+{
+ int i, j;
+
+ if ( opt_sync_console )
+ {
+ printk("**********************************************\n");
+ printk("******* WARNING: CONSOLE OUTPUT IS SYCHRONOUS\n");
+ printk("******* This option is intended to aid debugging "
+ "of Xen by ensuring\n");
+ printk("******* that all output is synchronously delivered "
+ "on the serial line.\n");
+ printk("******* However it can introduce SIGNIFICANT latencies "
+ "and affect\n");
+ printk("******* timekeeping. It is NOT recommended for "
+ "production use!\n");
+ printk("**********************************************\n");
+ for ( i = 0; i < 3; i++ )
+ {
+ printk("%d... ", 3-i);
+ for ( j = 0; j < 100; j++ )
+ {
+ if ( softirq_pending(smp_processor_id()) )
+ do_softirq();
+ mdelay(10);
+ }
+ }
+ printk("\n");
+ }
+
+ if ( vgacon_enabled )
+ {
+ vgacon_enabled--;
+ printk("Xen is %s VGA console.\n",
+ vgacon_enabled ? "keeping" : "relinquishing");
+ }
/*
* If user specifies so, we fool the switch routine to redirect input
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_core.h
--- a/xen/include/acm/acm_core.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/acm/acm_core.h Fri Jun 23 15:33:25 2006 -0600
@@ -121,10 +121,11 @@ int acm_init_domain_ssid(domid_t id, ssi
int acm_init_domain_ssid(domid_t id, ssidref_t ssidref);
void acm_free_domain_ssid(struct acm_ssid_domain *ssid);
int acm_init_binary_policy(u32 policy_code);
-int acm_set_policy(void *buf, u32 buf_size, int isuserbuffer);
-int acm_get_policy(void *buf, u32 buf_size);
-int acm_dump_statistics(void *buf, u16 buf_size);
-int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size);
+int acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int do_acm_set_policy(void *buf, u32 buf_size);
+int acm_get_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int acm_dump_statistics(XEN_GUEST_HANDLE(void) buf, u16 buf_size);
+int acm_get_ssid(ssidref_t ssidref, XEN_GUEST_HANDLE(void) buf, u16 buf_size);
int acm_get_decision(ssidref_t ssidref1, ssidref_t ssidref2, u32 hook);
int acm_set_policy_reference(u8 * buf, u32 buf_size);
int acm_dump_policy_reference(u8 *buf, u32 buf_size);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/acm/acm_hooks.h Fri Jun 23 15:33:25 2006 -0600
@@ -273,7 +273,12 @@ static inline void acm_post_dom0_op(stru
op->u.createdomain.domain, op->u.createdomain.ssidref);
break;
case DOM0_DESTROYDOMAIN:
- acm_post_domain_destroy(ssid, op->u.destroydomain.domain);
+ if (*ssid == NULL) {
+ printkd("%s: ERROR. SSID unset.\n",
+ __func__);
+ break;
+ }
+ acm_post_domain_destroy(*ssid, op->u.destroydomain.domain);
/* free security ssid for the destroyed domain (also if null policy */
acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
*ssid = NULL;
@@ -281,13 +286,22 @@ static inline void acm_post_dom0_op(stru
}
}
-static inline void acm_fail_dom0_op(struct dom0_op *op, void *ssid)
+static inline void acm_fail_dom0_op(struct dom0_op *op, void **ssid)
{
switch(op->cmd) {
case DOM0_CREATEDOMAIN:
acm_fail_domain_create(
current->domain->ssid, op->u.createdomain.ssidref);
break;
+ case DOM0_DESTROYDOMAIN:
+ /* we don't handle domain destroy failure but at least free the ssid
*/
+ if (*ssid == NULL) {
+ printkd("%s: ERROR. SSID unset.\n",
+ __func__);
+ break;
+ }
+ acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
+ *ssid = NULL;
}
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/debugger.h
--- a/xen/include/asm-ia64/debugger.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/debugger.h Fri Jun 23 15:33:25 2006 -0600
@@ -41,6 +41,14 @@
#include <xen/gdbstub.h>
void show_registers(struct cpu_user_regs *regs);
+void dump_stack(void);
+
+static inline void
+show_execution_state(struct cpu_user_regs *regs)
+{
+ show_registers(regs);
+ dump_stack();
+}
// NOTE: on xen struct pt_regs = struct cpu_user_regs
// see include/asm-ia64/linux-xen/asm/ptrace.h
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/vmx.h Fri Jun 23 15:33:25 2006 -0600
@@ -42,6 +42,7 @@ extern void vmx_save_state(struct vcpu *
extern void vmx_save_state(struct vcpu *v);
extern void vmx_load_state(struct vcpu *v);
extern void show_registers(struct pt_regs *regs);
+#define show_execution_state show_registers
extern int vmx_build_physmap_table(struct domain *d);
extern unsigned long __gpfn_to_mfn_foreign(struct domain *d, unsigned long
gpfn);
extern void sync_split_caches(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/xenprocessor.h
--- a/xen/include/asm-ia64/xenprocessor.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-ia64/xenprocessor.h Fri Jun 23 15:33:25 2006 -0600
@@ -237,4 +237,6 @@ typedef union {
u64 itir;
} ia64_itir_t;
+#define dump_execution_state() printk("FIXME: implement ia64
dump_execution_state()\n");
+
#endif // _ASM_IA64_XENPROCESSOR_H
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:33:25 2006 -0600
@@ -132,7 +132,7 @@ extern unsigned int opt_hvm_debug_level;
#define __hvm_bug(regs) \
do { \
printk("__hvm_bug at %s:%d\n", __FILE__, __LINE__); \
- show_registers(regs); \
+ show_execution_state(regs); \
domain_crash_synchronous(); \
} while (0)
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/mm.h Fri Jun 23 15:33:25 2006 -0600
@@ -103,13 +103,11 @@ struct page_info
#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
#define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask)
#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift)
-#define PGT_pae_idx_shift PGT_high_mfn_shift
#else
/* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
#define PGT_mfn_mask ((1U<<23)-1)
/* NX for PAE xen is not supported yet */
#define PGT_high_mfn_nx (1ULL << 63)
-#define PGT_pae_idx_shift 23
#endif
#define PGT_score_shift 23
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/processor.h Fri Jun 23 15:33:25 2006 -0600
@@ -128,6 +128,13 @@
/* 'arch_vcpu' flags values */
#define _TF_kernel_mode 0
#define TF_kernel_mode (1<<_TF_kernel_mode)
+
+/* #PF error code values. */
+#define PGERR_page_present (1U<<0)
+#define PGERR_write_access (1U<<1)
+#define PGERR_user_mode (1U<<2)
+#define PGERR_reserved_bit (1U<<3)
+#define PGERR_instr_fetch (1U<<4)
#ifndef __ASSEMBLY__
@@ -522,10 +529,16 @@ extern always_inline void prefetchw(cons
#endif
void show_stack(struct cpu_user_regs *regs);
+void show_stack_overflow(unsigned long esp);
void show_registers(struct cpu_user_regs *regs);
+void show_execution_state(struct cpu_user_regs *regs);
void show_page_walk(unsigned long addr);
-int __spurious_page_fault(unsigned long addr);
asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
+
+/* Dumps current register and stack state. */
+#define dump_execution_state() \
+ /* NB. Needs interrupts enabled else we end up in fatal_trap(). */ \
+ __asm__ __volatile__ ( "pushf ; sti ; ud2 ; .ascii \"dbg\" ; popf" )
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow.h Fri Jun 23 15:33:25 2006 -0600
@@ -112,6 +112,30 @@ do {
} while (0)
#endif
+#if CONFIG_PAGING_LEVELS >= 3
+static inline u64 get_cr3_idxval(struct vcpu *v)
+{
+ u64 pae_cr3;
+
+ if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
+ !shadow_mode_log_dirty(v->domain) )
+ {
+ pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
+ return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
+ }
+ else
+ return 0;
+}
+
+#define shadow_key_t u64
+#define index_to_key(x) ((x) << 32)
+#else
+#define get_cr3_idxval(v) (0)
+#define shadow_key_t unsigned long
+#define index_to_key(x) (0)
+#endif
+
+
#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1)
- (_max)) << 16) | (_min))
#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded)
>> 16))
@@ -309,7 +333,7 @@ extern unsigned long get_mfn_from_gpfn_f
struct shadow_status {
struct shadow_status *next; /* Pull-to-front list per hash bucket. */
- unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
+ shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */
unsigned long smfn; /* Shadow mfn. */
};
@@ -1180,7 +1204,13 @@ static inline unsigned long __shadow_sta
struct domain *d, unsigned long gpfn, unsigned long stype)
{
struct shadow_status *p, *x, *head;
- unsigned long key = gpfn | stype;
+ shadow_key_t key;
+#if CONFIG_PAGING_LEVELS >= 3
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype ==
PGT_l4_shadow )
+ key = gpfn | stype | index_to_key(get_cr3_idxval(current));
+ else
+#endif
+ key = gpfn | stype;
ASSERT(shadow_lock_is_acquired(d));
ASSERT(gpfn == (gpfn & PGT_mfn_mask));
@@ -1295,10 +1325,11 @@ shadow_max_pgtable_type(struct domain *d
}
static inline void delete_shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int
stype)
+ struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int
stype, u64 index)
{
struct shadow_status *p, *x, *n, *head;
- unsigned long key = gpfn | stype;
+
+ shadow_key_t key = gpfn | stype | index_to_key(index);
ASSERT(shadow_lock_is_acquired(d));
ASSERT(!(gpfn & ~PGT_mfn_mask));
@@ -1374,11 +1405,12 @@ static inline void delete_shadow_status(
static inline void set_shadow_status(
struct domain *d, unsigned long gpfn, unsigned long gmfn,
- unsigned long smfn, unsigned long stype)
+ unsigned long smfn, unsigned long stype, u64 index)
{
struct shadow_status *x, *head, *extra;
int i;
- unsigned long key = gpfn | stype;
+
+ shadow_key_t key = gpfn | stype | index_to_key(index);
SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow_64.h Fri Jun 23 15:33:25 2006 -0600
@@ -36,9 +36,9 @@
*/
extern struct shadow_ops MODE_64_2_HANDLER;
extern struct shadow_ops MODE_64_3_HANDLER;
+extern struct shadow_ops MODE_64_PAE_HANDLER;
#if CONFIG_PAGING_LEVELS == 4
extern struct shadow_ops MODE_64_4_HANDLER;
-extern struct shadow_ops MODE_64_PAE_HANDLER;
#endif
#if CONFIG_PAGING_LEVELS == 3
@@ -65,10 +65,6 @@ typedef struct { intpte_t l4; } l4_pgent
#define ESH_LOG(_f, _a...) ((void)0)
#endif
-#define PAGING_L4 4UL
-#define PAGING_L3 3UL
-#define PAGING_L2 2UL
-#define PAGING_L1 1UL
#define L_MASK 0xff
#define PAE_PAGING_LEVELS 3
@@ -108,18 +104,14 @@ typedef struct { intpte_t lo; } pgentry_
#define entry_has_changed(x,y,flags) \
( !!(((x).lo ^ (y).lo) &
((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+/******************************************************************************/
+/*
+ * The macro and inlines are for 32-bit PAE guest
+ */
+#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */
+
#define PAE_SHADOW_SELF_ENTRY 259
#define PAE_L3_PAGETABLE_ENTRIES 4
-
-/******************************************************************************/
-/*
- * The macro and inlines are for 32-bit PAE guest on 64-bit host
- */
-#define PAE_CR3_ALIGN 5
-#define PAE_CR3_IDX_MASK 0x7f
-#define PAE_CR3_IDX_NO 128
-
-#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */
/******************************************************************************/
static inline int table_offset_64(unsigned long va, int level)
@@ -186,19 +178,10 @@ static inline int guest_table_offset_64(
}
}
-static inline unsigned long get_cr3_idxval(struct vcpu *v)
-{
- unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
-
- return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
-}
-
-
#define SH_GUEST_32PAE 1
#else
#define guest_table_offset_64(va, level, index) \
table_offset_64((va),(level))
-#define get_cr3_idxval(v) 0
#define SH_GUEST_32PAE 0
#endif
@@ -514,7 +497,10 @@ static inline void entry_general(
l1_p =(pgentry_64_t *)map_domain_page(smfn);
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- entry_remove_flags(l1_p[i], _PAGE_RW);
+ {
+ if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) )
+ entry_remove_flags(l1_p[i], _PAGE_RW);
+ }
unmap_domain_page(l1_p);
}
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_ops.h
--- a/xen/include/asm-x86/shadow_ops.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/asm-x86/shadow_ops.h Fri Jun 23 15:33:25 2006 -0600
@@ -21,6 +21,14 @@
#ifndef _XEN_SHADOW_OPS_H
#define _XEN_SHADOW_OPS_H
+
+#define PAGING_L4 4UL
+#define PAGING_L3 3UL
+#define PAGING_L2 2UL
+#define PAGING_L1 1UL
+
+#define PAE_CR3_ALIGN 5
+#define PAE_CR3_IDX_MASK 0x7f
#if defined( GUEST_PGENTRY_32 )
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/arch-x86_32.h Fri Jun 23 15:33:25 2006 -0600
@@ -74,16 +74,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
*/
#ifdef CONFIG_X86_PAE
#define __HYPERVISOR_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_END 0xF6800000
#else
#define __HYPERVISOR_VIRT_START 0xFC000000
+#define __MACH2PHYS_VIRT_START 0xFC000000
+#define __MACH2PHYS_VIRT_END 0xFC400000
#endif
#ifndef HYPERVISOR_VIRT_START
#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
#endif
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
#endif
/* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/arch-x86_64.h Fri Jun 23 15:33:25 2006 -0600
@@ -85,21 +85,25 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
#ifndef HYPERVISOR_VIRT_START
#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
#endif
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
/* Maximum number of virtual CPUs in multi-processor guests. */
#define MAX_VIRT_CPUS 32
#ifndef __ASSEMBLY__
-
-/* The machine->physical mapping table starts at this address, read-only. */
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
/*
* int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/dom0_ops.h Fri Jun 23 15:33:25 2006 -0600
@@ -513,6 +513,27 @@ struct dom0_hypercall_init {
};
typedef struct dom0_hypercall_init dom0_hypercall_init_t;
DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
+
+#define DOM0_DOMAIN_SETUP 49
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+typedef struct dom0_domain_setup {
+ domid_t domain; /* domain to be affected */
+ unsigned long flags; /* XEN_DOMAINSETUP_* */
+#ifdef __ia64__
+ unsigned long bp; /* mpaddr of boot param area */
+ unsigned long maxmem; /* Highest memory address for MDT. */
+#endif
+} dom0_domain_setup_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_domain_setup_t);
+
+#define DOM0_SETTIMEOFFSET 50
+struct dom0_settimeoffset {
+ domid_t domain;
+ int32_t time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct dom0_settimeoffset dom0_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_settimeoffset_t);
struct dom0_op {
uint32_t cmd;
@@ -555,6 +576,8 @@ struct dom0_op {
struct dom0_irq_permission irq_permission;
struct dom0_iomem_permission iomem_permission;
struct dom0_hypercall_init hypercall_init;
+ struct dom0_domain_setup domain_setup;
+ struct dom0_settimeoffset settimeoffset;
uint8_t pad[128];
} u;
};
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/memory.h
--- a/xen/include/public/memory.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/public/memory.h Fri Jun 23 15:33:25 2006 -0600
@@ -141,6 +141,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping 12
+struct xen_machphys_mapping {
+ unsigned long v_start, v_end; /* Start and end virtual addresses. */
+ unsigned long max_mfn; /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/*
* Sets the GPFN at which a particular page appears in the specified guest's
* pseudophysical address space.
* arg == addr of xen_add_to_physmap_t.
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/console.h
--- a/xen/include/xen/console.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/console.h Fri Jun 23 15:33:25 2006 -0600
@@ -15,7 +15,7 @@ long read_console_ring(XEN_GUEST_HANDLE(
long read_console_ring(XEN_GUEST_HANDLE(char), u32 *, int);
void init_console(void);
-void console_endboot(int disable_vga);
+void console_endboot(void);
void console_force_unlock(void);
void console_force_lock(void);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/lib.h Fri Jun 23 15:33:25 2006 -0600
@@ -82,6 +82,7 @@ unsigned long long parse_size_and_unit(c
#define TAINT_UNSAFE_SMP (1<<0)
#define TAINT_MACHINE_CHECK (1<<1)
#define TAINT_BAD_PAGE (1<<2)
+#define TAINT_SYNC_CONSOLE (1<<3)
extern int tainted;
#define TAINT_STRING_MAX_LEN 20
extern char *print_tainted(char *str);
diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Fri Jun 23 15:26:01 2006 -0600
+++ b/xen/include/xen/sched.h Fri Jun 23 15:33:25 2006 -0600
@@ -159,6 +159,7 @@ struct domain
/* OProfile support. */
struct xenoprof *xenoprof;
+ int32_t time_offset_seconds;
};
struct domain_setup_info
diff -r 59d4c1863330 -r fdf25330e4a6
patches/linux-2.6.16.13/ipv6-no-autoconf.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/ipv6-no-autoconf.patch Fri Jun 23 15:33:25
2006 -0600
@@ -0,0 +1,23 @@
+ net/ipv6/addrconf.c | 2 ++
+ 1 files changed, 2 insertions(+)
+
+Index: build/net/ipv6/addrconf.c
+===================================================================
+--- build.orig/net/ipv6/addrconf.c
++++ build/net/ipv6/addrconf.c
+@@ -2462,6 +2462,7 @@ static void addrconf_dad_start(struct in
+ spin_lock_bh(&ifp->lock);
+
+ if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
++ !(dev->flags&IFF_MULTICAST) ||
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+@@ -2546,6 +2547,7 @@ static void addrconf_dad_completed(struc
+ if (ifp->idev->cnf.forwarding == 0 &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0 &&
++ (dev->flags & IFF_MULTICAST) &&
+ (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ struct in6_addr all_routers;
+
diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/piix4acpi.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/piix4acpi.c Fri Jun 23 15:33:25 2006 -0600
@@ -0,0 +1,481 @@
+/*
+ * PIIX4 ACPI controller emulation
+ *
+ * Winston liwen Wang, winston.l.wang@xxxxxxxxx
+ * Copyright (c) 2006 , Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "vl.h"
+#define FREQUENCE_PMTIMER 3753425
+/* acpi register bit define here */
+
+/* PM1_STS */
+#define TMROF_STS (1 << 0)
+#define BM_STS (1 << 4)
+#define GBL_STS (1 << 5)
+#define PWRBTN_STS (1 << 8)
+#define RTC_STS (1 << 10)
+#define PRBTNOR_STS (1 << 11)
+#define WAK_STS (1 << 15)
+/* PM1_EN */
+#define TMROF_EN (1 << 0)
+#define GBL_EN (1 << 5)
+#define PWRBTN_EN (1 << 8)
+#define RTC_EN (1 << 10)
+/* PM1_CNT */
+#define SCI_EN (1 << 0)
+#define GBL_RLS (1 << 2)
+#define SLP_EN (1 << 13)
+
+/* Bits of PM1a register define here */
+#define SLP_TYP_MASK 0x1C00
+#define SLP_VAL 0x1C00
+
+typedef struct AcpiDeviceState AcpiDeviceState;
+AcpiDeviceState *acpi_device_table;
+
+/* Bits of PM1a register define here */
+typedef struct PMTState {
+ uint32_t count;
+ int irq;
+ uint64_t next_pm_time;
+ QEMUTimer *pm_timer;
+}PMTState;
+
+typedef struct PM1Event_BLK {
+ uint16_t pm1_status; /* pm1a_EVT_BLK */
+ uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+}PM1Event_BLK;
+
+typedef struct PCIAcpiState {
+ PCIDevice dev;
+ uint16_t irq;
+ uint16_t pm1_status; /* pm1a_EVT_BLK */
+ uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+ uint16_t pm1_control; /* pm1a_ECNT_BLK */
+ uint32_t pm1_timer; /* pmtmr_BLK */
+} PCIAcpiState;
+
+static PMTState *pmtimer_state;
+static PCIAcpiState *acpi_state;
+
+static void pmtimer_save(QEMUFile *f, void *opaque)
+{
+ PMTState *s = opaque;
+
+ qemu_put_be32s(f, &s->count);
+ qemu_put_be32s(f, &s->irq);
+ qemu_put_be64s(f, &s->next_pm_time);
+ qemu_put_timer(f, s->pm_timer);
+}
+
+static int pmtimer_load(QEMUFile *f, void *opaque, int version_id)
+{
+ PMTState *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+ qemu_get_be32s(f, &s->count);
+ qemu_get_be32s(f, &s->irq);
+ qemu_get_be64s(f, &s->next_pm_time);
+ qemu_get_timer(f, s->pm_timer);
+ return 0;
+
+}
+
+static inline void acpi_set_irq(PCIAcpiState *s)
+{
+/* no real SCI event need for now, so comment the following line out */
+/* pic_set_irq(s->irq, 1); */
+ printf("acpi_set_irq: s->irq %x \n",s->irq);
+}
+
+static void pm_timer_update(void *opaque)
+{
+ PMTState *s = opaque;
+ s->next_pm_time += muldiv64(1, ticks_per_sec,FREQUENCE_PMTIMER);
+ qemu_mod_timer(s->pm_timer, s->next_pm_time);
+ acpi_state->pm1_timer ++;
+
+ /* If pm timer is zero then reset it to zero. */
+ if (acpi_state->pm1_timer >= 0x1000000) {
+/* printf("pm_timerupdate: timer overflow: %x \n",
acpi_state->pm1_timer); */
+
+ acpi_state->pm1_timer = 0;
+ acpi_state->pm1_status = acpi_state->pm1_status | TMROF_STS;
+ /* If TMROF_EN is set then send the irq. */
+ if ((acpi_state->pm1_enable & TMROF_EN) == TMROF_EN) {
+ acpi_set_irq(acpi_state);
+ acpi_state->pm1_enable = 0x00; /* only need one time...*/
+ }
+ }
+ s->count = acpi_state->pm1_timer;
+}
+
+static PMTState *pmtimer_init(void)
+{
+ PMTState *s;
+
+ s = qemu_mallocz(sizeof(PMTState));
+ if (!s)
+ return NULL;
+
+ /* s->irq = irq; */
+
+ s->pm_timer = qemu_new_timer(vm_clock, pm_timer_update, s);
+
+ s->count = 0;
+ s->next_pm_time = qemu_get_clock(vm_clock) + muldiv64(1,
ticks_per_sec,FREQUENCE_PMTIMER) + 1;
+ qemu_mod_timer(s->pm_timer, s->next_pm_time);
+
+ register_savevm("pm timer", 1, 1, pmtimer_save, pmtimer_load, s);
+ return s;
+}
+
+static void acpi_reset(PCIAcpiState *s)
+{
+ uint8_t *pci_conf;
+ pci_conf = s->dev.config;
+
+ pci_conf[0x42] = 0x00;
+ pci_conf[0x43] = 0x00;
+ s->irq = 9;
+ s->pm1_status = 0;
+ s->pm1_enable = 0x00; /* TMROF_EN should cleared */
+ s->pm1_control = SCI_EN; /* SCI_EN */
+ s->pm1_timer = 0;
+}
+
+/*byte access */
+static void acpiPm1Status_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ if ((val&TMROF_STS)==TMROF_STS)
+ s->pm1_status = s->pm1_status&!TMROF_STS;
+
+ if ((val&GBL_STS)==GBL_STS)
+ s->pm1_status = s->pm1_status&!GBL_STS;
+
+/* printf("acpiPm1Status_writeb \n addr %x val:%x pm1_status:%x \n", addr,
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_status;
+/* printf("acpiPm1Status_readb \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1StatusP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_status = (val<<8)||(s->pm1_status);
+/* printf("acpiPm1StatusP1_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1StatusP1_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = (s->pm1_status)>>8;
+ printf("acpiPm1StatusP1_readb \n addr %x val:%x\n", addr, val);
+
+ return val;
+}
+
+static void acpiPm1Enable_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_enable = val;
+/* printf("acpiPm1Enable_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Enable_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = (s->pm1_enable)||0x1;
+/* printf("acpiPm1Enable_readb \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1EnableP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_enable = (val<<8)||(s->pm1_enable);
+/* printf("acpiPm1EnableP1_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1EnableP1_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = (s->pm1_enable)>>8;
+/* printf("acpiPm1EnableP1_readb \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_control = val;
+/* printf("acpiPm1Control_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_control;
+/* printf("acpiPm1Control_readb \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_control = (val<<8)||(s->pm1_control);
+/* printf("acpiPm1ControlP1_writeb \n addr %x val:%x\n", addr, val); */
+
+ // Check for power off request
+
+ if (((val & SLP_EN) != 0) &&
+ ((val & SLP_TYP_MASK) == SLP_VAL)) {
+ s->pm1_timer=0x0; //clear ACPI timer
+ qemu_system_shutdown_request();
+ }
+}
+
+static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = (s->pm1_control)>>8;
+/* printf("acpiPm1ControlP1_readb \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+
+/* word access */
+
+static void acpiPm1Status_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ if ((val&TMROF_STS)==TMROF_STS)
+ s->pm1_status = s->pm1_status&!TMROF_STS;
+
+ if ((val&GBL_STS)==GBL_STS)
+ s->pm1_status = s->pm1_status&!GBL_STS;
+
+/* printf("acpiPm1Status_writew \n addr %x val:%x pm1_status:%x \n", addr,
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readw(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_status;
+/* printf("acpiPm1Status_readw \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1Enable_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_enable = val;
+/* printf("acpiPm1Enable_writew \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Enable_readw(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_enable;
+/* printf("acpiPm1Enable_readw \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_control = val;
+/* printf("acpiPm1Control_writew \n addr %x val:%x\n", addr, val); */
+
+ // Check for power off request
+
+ if (((val & SLP_EN) != 0) &&
+ ((val & SLP_TYP_MASK) == SLP_VAL)) {
+ qemu_system_shutdown_request();
+ }
+
+}
+
+static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_control;
+/* printf("acpiPm1Control_readw \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+/* dword access */
+
+static void acpiPm1Event_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_status = val;
+ s->pm1_enable = val>>16;
+/* printf("acpiPm1Event_writel \n addr %x val:%x \n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Event_readl(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_status|(s->pm1_enable<<16);
+/* printf("acpiPm1Event_readl \n addr %x val:%x\n", addr, val); */
+
+ return val;
+}
+
+static void acpiPm1Timer_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+ PCIAcpiState *s = opaque;
+
+ s->pm1_timer = val;
+/* printf("acpiPm1Timer_writel \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Timer_readl(void *opaque, uint32_t addr)
+{
+ PCIAcpiState *s = opaque;
+ uint32_t val;
+
+ val = s->pm1_timer;
+/* printf("acpiPm1Timer_readl \n addr %x val:%x\n", addr, val); */
+ return val;
+}
+
+static void acpi_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCIAcpiState *d = (PCIAcpiState *)pci_dev;
+
+ printf("register acpi io\n");
+
+ /* Byte access */
+ register_ioport_write(addr, 1, 1, acpiPm1Status_writeb, d);
+ register_ioport_read(addr, 1, 1, acpiPm1Status_readb, d);
+ register_ioport_write(addr+1, 1, 1, acpiPm1StatusP1_writeb, d);
+ register_ioport_read(addr+1, 1, 1, acpiPm1StatusP1_readb, d);
+
+ register_ioport_write(addr + 2, 1, 1, acpiPm1Enable_writeb, d);
+ register_ioport_read(addr + 2, 1, 1, acpiPm1Enable_readb, d);
+ register_ioport_write(addr + 2 +1, 1, 1, acpiPm1EnableP1_writeb, d);
+ register_ioport_read(addr + 2 +1, 1, 1, acpiPm1EnableP1_readb, d);
+
+ register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
+ register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
+ register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
+ register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
+
+ /* Word access */
+ register_ioport_write(addr, 2, 2, acpiPm1Status_writew, d);
+ register_ioport_read(addr, 2, 2, acpiPm1Status_readw, d);
+
+ register_ioport_write(addr + 2, 2, 2, acpiPm1Enable_writew, d);
+ register_ioport_read(addr + 2, 2, 2, acpiPm1Enable_readw, d);
+
+ register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
+ register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
+
+ /* DWord access */
+ register_ioport_write(addr, 4, 4, acpiPm1Event_writel, d);
+ register_ioport_read(addr, 4, 4, acpiPm1Event_readl, d);
+
+ register_ioport_write(addr + 8, 4, 4, acpiPm1Timer_writel, d);
+ register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d);
+}
+
+/* PIIX4 acpi pci configuration space, func 3 */
+void pci_piix4_acpi_init(PCIBus *bus)
+{
+ PCIAcpiState *d;
+ uint8_t *pci_conf;
+
+ /* register a function 3 of PIIX4 */
+ d = (PCIAcpiState *)pci_register_device(
+ bus, "PIIX4 ACPI", sizeof(PCIAcpiState),
+ ((PCIDevice *)piix3_state)->devfn + 3, NULL, NULL);
+
+ acpi_state = d;
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = 0x86; /* Intel */
+ pci_conf[0x01] = 0x80;
+ pci_conf[0x02] = 0x13;
+ pci_conf[0x03] = 0x71;
+ pci_conf[0x08] = 0x01; /* B0 stepping */
+ pci_conf[0x09] = 0x00; /* base class */
+ pci_conf[0x0a] = 0x80; /* Sub class */
+ pci_conf[0x0b] = 0x06;
+ pci_conf[0x0e] = 0x00;
+ pci_conf[0x3d] = 0x01; /* Hardwired to PIRQA is used */
+
+ pci_register_io_region((PCIDevice *)d, 4, 0x10,
+ PCI_ADDRESS_SPACE_IO, acpi_map);
+ pmtimer_state = pmtimer_init();
+ acpi_reset (d);
+}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/SSHTransport.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/SSHTransport.py Fri Jun 23 15:33:25 2006 -0600
@@ -0,0 +1,102 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
+# Copyright (C) 2006 XenSource Inc.
+#============================================================================
+
+"""
+XML-RPC SSH transport.
+"""
+
+from xmlrpclib import getparser, Fault
+from subprocess import Popen, PIPE
+from getpass import getuser
+from fcntl import ioctl
+import errno
+import os
+import termios
+
+
+def getHTTPURI(uri):
+ (protocol, rest) = uri.split(':', 1)
+ if not rest.startswith('//'):
+ raise ValueError("Invalid ssh URL '%s'" % uri)
+ rest = rest[2:]
+ user = getuser()
+ path = 'RPC2'
+ if rest.find('@') != -1:
+ (user, rest) = rest.split('@', 1)
+ if rest.find('/') != -1:
+ (host, rest) = rest.split('/', 1)
+ if len(rest) > 0:
+ path = rest
+ else:
+ host = rest
+ transport = SSHTransport(host, user)
+ uri = 'http://%s/%s' % (host, path)
+ return transport, uri
+
+
+class SSHTransport(object):
+ def __init__(self, host, user, askpass=None):
+ self.host = host
+ self.user = user
+ self.askpass = askpass
+ self.ssh = None
+
+ def getssh(self):
+ if self.ssh == None:
+ if self.askpass:
+ f = open('/dev/tty', 'w')
+ try:
+ os.environ['SSH_ASKPASS'] = self.askpass
+ ioctl(f.fileno(), termios.TIOCNOTTY)
+ finally:
+ f.close()
+
+ cmd = ['ssh', '%s@%s' % (self.user, self.host), 'xm serve']
+ try:
+ self.ssh = Popen(cmd, bufsize=0, stdin=PIPE, stdout=PIPE)
+ except OSError, (err, msg):
+ if err == errno.ENOENT:
+ raise Fault(0, "ssh executable not found!")
+ raise
+ return self.ssh
+
+ def request(self, host, handler, request_body, verbose=0):
+ p, u = getparser()
+ ssh = self.getssh()
+ ssh.stdin.write("""POST /%s HTTP/1.1
+User-Agent: Xen
+Host: %s
+Content-Type: text/xml
+Content-Length: %d
+
+%s""" % (handler, host, len(request_body), request_body))
+ ssh.stdin.flush()
+
+ content_length = 0
+ line = ssh.stdout.readline()
+ if line.split()[1] != '200':
+ raise Fault(0, 'Server returned %s' % (' '.join(line[1:])))
+
+ while line not in ['', '\r\n', '\n']:
+ if line.lower().startswith('content-length:'):
+ content_length = int(line[15:].strip())
+ line = ssh.stdout.readline()
+ content = ssh.stdout.read(content_length)
+ p.feed(content)
+ p.close()
+ return u.close()
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/Makefile
--- a/tools/blktap/Makefile Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR = 3.0
-MINOR = 0
-SONAME = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS := -lpthread -lz
-
-SRCS :=
-SRCS += blktaplib.c xenbus.c blkif.c
-
-CFLAGS += -Werror
-CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
-CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-CFLAGS += $(INCLUDES)
-DEPS = .*.d
-
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-IBINS :=
-#IBINS += blkdump
-
-LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: install
-install: all
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DIR) -p $(DESTDIR)/usr/include
- $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
- #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: clean
-clean:
- rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: rpm
-rpm: all
- rm -rf staging
- mkdir staging
- mkdir staging/i386
- rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
- --define "_rpmdir$$PWD/staging" -bb rpm.spec
- mv staging/i386/*.rpm .
- rm -rf staging
-
-libblktap.so: $(OBJS)
- $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \
- -L$(XEN_XENSTORE) -l xenstore \
- -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
- ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
- ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
- $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
- -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
- etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README
--- a/tools/blktap/README Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code. It works for me. Your
-mileage may vary. Don't use it for anything important. Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change. This is all
-early code, and I am checking it in because others want to play with
-it. If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device. The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended. This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
- in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
- rather than passing them along to the XenLinux blkback driver.
- (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter. Requests are messages from a client (frontend)
-domain to a disk (backend) domain. Responses are messages travelling
-back, acknowledging the completion of a request. the library allows
-chains of functions to be attached to these events. In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces. This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain. Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
- image_init();
-
- blktap_register_ctrl_hook("image_control", image_control);
- blktap_register_request_hook("image_request", image_request);
- blktap_listen();
-
- return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough. image_control() will be called with
-all control messages. image_request() handles requests. As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
- blkimg - Use a image file/device to serve requests
- blkgnbd - Use a remote gnbd server to serve requests
- blkaio - Use libaio... (DOES NOT WORK)
-
-Things that don't:
-
- blkdump - Print in-flight requests.
- blkcow - Really inefficient copy-on-write disks using libdb to store
- writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work. At the moment these generally just handle a
- single front-end client at a time.
-
-- Integrate with Xend. Need to cleanly pass a image identifier in the connect
- message.
-
-- Make an asynchronous file-io terminator. The libaio attempt is
- tragically stalled because mapped foreign pages make pfn_valid fail
- (they are VM_IO), and so cannot be passed to aio as targets. A
- better solution may be to tear the disk interfaces out of the real
- backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
- plugins, so thatthey don't all need a new main().
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05 Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver. I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants. A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver. It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code. This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet. At the moment,
-the daemon ignores the pdev requested by the tools and opens the file
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
- just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
- - creating a blkif should take a reference.
- - each inflight request should take a reference on dequeue in blktaplib
- - sending responses should drop refs.
- - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap
- interfaces.
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include "blktaplib.h"
-
-int request_print(blkif_request_t *req)
-{
- int i;
-
- if ( (req->operation == BLKIF_OP_READ) ||
- (req->operation == BLKIF_OP_WRITE) )
- {
- printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n",
- ID_TO_DOM(req->id), ID_TO_IDX(req->id),
- blkif_op_name[req->operation],
- req->nr_segments, req->handle,
- req->sector_number);
-
-
- for (i=0; i < req->nr_segments; i++) {
- printf(" (gref: 0x%8x start: %u stop: %u)\n",
- req->seg[i].gref,
- req->seg[i].first_sect,
- req->seg[i].last_sect);
- }
-
- } else {
- printf("Unknown request message type.\n");
- }
-
- return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{
- if ( (rsp->operation == BLKIF_OP_READ) ||
- (rsp->operation == BLKIF_OP_WRITE) )
- {
- printf("[%2u:%2u>%5s] (status: %d)\n",
- ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
- blkif_op_name[rsp->operation],
- rsp->status);
-
- } else {
- printf("Unknown request message type.\n");
- }
- return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
- blktap_register_request_hook("request_print", request_print);
- blktap_register_response_hook("response_print", response_print);
- blktap_listen();
-
- return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- *
- * The blkif interface for blktap. A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
- return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
- blkif_t *blkif;
-
- blkif = (blkif_t *)malloc(sizeof(blkif_t));
- if (!blkif)
- return NULL;
-
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
-
- return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
- new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev,
- long int readonly)
-{
- domid_t domid;
- blkif_t **pblkif;
-
- if (blkif == NULL)
- return -EINVAL;
-
- domid = blkif->domid;
- blkif->handle = handle;
- blkif->pdev = pdev;
- blkif->readonly = readonly;
-
- /*
- * Call out to the new_blkif_hook. The tap application should define this,
- * and it should return having set blkif->ops
- *
- */
- if (new_blkif_hook == NULL)
- {
- warn("Probe detected a new blkif, but no new_blkif_hook!");
- return -1;
- }
- new_blkif_hook(blkif);
-
- /* Now wire it in. */
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTF("Could not create blkif: already exists\n");
- return -1;
- }
- pblkif = &(*pblkif)->hash_next;
- }
- blkif->hash_next = NULL;
- *pblkif = blkif;
-
- return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
- blkif_t **pblkif, *curs;
-
- pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
- while ( (curs = *pblkif) != NULL )
- {
- if ( blkif == curs )
- {
- *pblkif = curs->hash_next;
- }
- pblkif = &curs->hash_next;
- }
- free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_request_t *, int))
-{
- request_hook_t *rh_ent, **c;
-
- rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
- if (!rh_ent)
- {
- warn("couldn't allocate a new hook");
- return;
- }
-
- rh_ent->func = rh;
- rh_ent->next = NULL;
- if (asprintf(&rh_ent->name, "%s", name) == -1)
- {
- free(rh_ent);
- warn("couldn't allocate a new hook name");
- return;
- }
-
- c = &blkif->request_hook_chain;
- while (*c != NULL) {
- c = &(*c)->next;
- }
- *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_response_t *,
int))
-{
- response_hook_t *rh_ent, **c;
-
- rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
- if (!rh_ent)
- {
- warn("couldn't allocate a new hook");
- return;
- }
-
- rh_ent->func = rh;
- rh_ent->next = NULL;
- if (asprintf(&rh_ent->name, "%s", name) == -1)
- {
- free(rh_ent);
- warn("couldn't allocate a new hook name");
- return;
- }
-
- c = &blkif->response_hook_chain;
- while (*c != NULL) {
- c = &(*c)->next;
- }
- *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
- request_hook_t *req_hook;
- response_hook_t *rsp_hook;
-
- DPRINTF("Request Hooks:\n");
- req_hook = blkif->request_hook_chain;
- while (req_hook != NULL)
- {
- DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name);
- req_hook = req_hook->next;
- }
-
- DPRINTF("Response Hooks:\n");
- rsp_hook = blkif->response_hook_chain;
- while (rsp_hook != NULL)
- {
- DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
- rsp_hook = rsp_hook->next;
- }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
- return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
- return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
- return 0;
-}
-
-
-void __init_blkif(void)
-{
- memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- *
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version)
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM 0x040
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring;
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES 1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-
-int bad_count = 0;
-void bad(void)
-{
- bad_count ++;
- if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
- return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id)
-{
- return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
- blkif_request_t *req_d;
- static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
- pthread_mutex_lock(&be_prod_mutex);
- req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
- memcpy(req_d, req, sizeof(blkif_request_t));
- wmb();
- be_ring.req_prod_pvt++;
- pthread_mutex_unlock(&be_prod_mutex);
-
- return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
- blkif_response_t *rsp_d;
- static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
- pthread_mutex_lock(&fe_prod_mutex);
- rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
- memcpy(rsp_d, rsp, sizeof(blkif_response_t));
- wmb();
- fe_ring.rsp_prod_pvt++;
- pthread_mutex_unlock(&fe_prod_mutex);
-
- return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
- response_hook_t *rsp_hook;
-
- rsp_hook = blkif->response_hook_chain;
- while (rsp_hook != NULL)
- {
- switch(rsp_hook->func(blkif, rsp, 1))
- {
- case BLKTAP_PASS:
- break;
- default:
- printf("Only PASS is supported for resp hooks!\n");
- }
- rsp_hook = rsp_hook->next;
- }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-
- apply_rsp_hooks(blkif, rsp);
-
- write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
- pthread_mutex_lock(&push_mutex);
-
- RING_PUSH_RESPONSES(&fe_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-
- pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
- int (*func)(int fd);
- struct pollfd *pfd;
- int fd;
- short events;
- int active;
-} pollhook_t;
-
-static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t pollhooks[MAX_POLLFDS];
-static unsigned int ph_freelist[MAX_POLLFDS];
-static unsigned int ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
- pollhook_t *ph;
-
- if (nr_pollhooks() == MAX_POLLFDS) {
- printf("Too many pollhooks!\n");
- return -1;
- }
-
- ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-
- ph->func = func;
- ph->fd = fd;
- ph->events = events;
- ph->active = 1;
-
- DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
- nr_pollhooks());
-
- return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
- int i;
-
- for (i=0; i<MAX_POLLFDS; i++)
- if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
- ph_freelist[PH_IDX(ph_prod++)] = i;
- pollhooks[i].pfd->fd = -1;
- pollhooks[i].active = 0;
- break;
- }
-
- DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
- nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
- int i;
-
- for (i=0; i < MAX_POLLFDS; i++) {
- ph_freelist[i] = (i+1) % MAX_POLLFDS;
- pollhooks[i].active = 0;
- }
-
- ph_cons = 0;
- ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
- pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
- int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
- struct xs_handle *h;
- blkif_t *blkif;
-
- /* comms rings: */
- blkif_request_t *req;
- blkif_response_t *rsp;
- blkif_sring_t *sring;
- RING_IDX rp, i, pfd_count;
-
- /* pending rings */
- blkif_request_t req_pending[BLK_RING_SIZE];
- /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-
- /* handler hooks: */
- request_hook_t *req_hook;
- response_hook_t *rsp_hook;
-
- signal (SIGBUS, got_sig_bus);
- signal (SIGINT, got_sig_int);
-
- __init_blkif();
-
- fd = open("/dev/blktap", O_RDWR);
- if (fd == -1)
- err(-1, "open failed!");
-
- blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
- if ((int)blktap_mem == -1)
- err(-1, "mmap failed!");
-
- /* assign the rings to the mapped memory */
-/*
- sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
- FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/
- sring = (blkif_sring_t *)((unsigned long)blktap_mem);
- BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
- mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
- /* Set up store connection and watch. */
- h = xs_daemon_open();
- if (h == NULL)
- err(-1, "xs_daemon_open");
-
- ret = add_blockdevice_probe_watch(h, "Domain-0");
- if (ret != 0)
- err(0, "adding device probewatch");
-
- ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
- while(1) {
- int ret;
-
- /* build the poll list */
- pfd_count = 0;
- for ( i=0; i < MAX_POLLFDS; i++ ) {
- pollhook_t *ph = &pollhooks[i];
-
- if (ph->active) {
- pfd[pfd_count].fd = ph->fd;
- pfd[pfd_count].events = ph->events;
- ph->pfd = &pfd[pfd_count];
- pfd_count++;
- }
- }
-
- tap_pfd = pfd_count++;
- pfd[tap_pfd].fd = fd;
- pfd[tap_pfd].events = POLLIN;
-
- store_pfd = pfd_count++;
- pfd[store_pfd].fd = xs_fileno(h);
- pfd[store_pfd].events = POLLIN;
-
- if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
- if (DEBUG_RING_IDXS)
- ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
- continue;
- }
-
- for (i=0; i < MAX_POLLFDS; i++) {
- if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
- pollhooks[i].func(pollhooks[i].pfd->fd);
- }
-
- if (pfd[store_pfd].revents) {
- ret = xs_fire_next_watch(h);
- }
-
- if (pfd[tap_pfd].revents)
- {
- /* empty the fe_ring */
- notify_fe = 0;
- notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
- rp = fe_ring.sring->req_prod;
- rmb();
- for (i = fe_ring.req_cons; i != rp; i++)
- {
- int done = 0;
-
- req = RING_GET_REQUEST(&fe_ring, i);
- memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
- req = &req_pending[ID_TO_IDX(req->id)];
-
- blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
- if (blkif != NULL)
- {
- req_hook = blkif->request_hook_chain;
- while (req_hook != NULL)
- {
- switch(req_hook->func(blkif, req, ((i+1) == rp)))
- {
- case BLKTAP_RESPOND:
- apply_rsp_hooks(blkif, (blkif_response_t *)req);
- write_rsp_to_fe_ring((blkif_response_t *)req);
- notify_fe = 1;
- done = 1;
- break;
- case BLKTAP_STOLEN:
- done = 1;
- break;
- case BLKTAP_PASS:
- break;
- default:
- printf("Unknown request hook return value!\n");
- }
- if (done) break;
- req_hook = req_hook->next;
- }
- }
-
- if (done == 0)
- {
- /* this was: */
- /* write_req_to_be_ring(req); */
-
- unsigned long id = req->id;
- unsigned short operation = req->operation;
- printf("Unterminated request!\n");
- rsp = (blkif_response_t *)req;
- rsp->id = id;
- rsp->operation = operation;
- rsp->status = BLKIF_RSP_ERROR;
- write_rsp_to_fe_ring(rsp);
- notify_fe = 1;
- done = 1;
- }
-
- }
- fe_ring.req_cons = i;
-
- /* empty the be_ring */
-/*
- notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
- rp = be_ring.sring->rsp_prod;
- rmb();
- for (i = be_ring.rsp_cons; i != rp; i++)
- {
-
- rsp = RING_GET_RESPONSE(&be_ring, i);
- memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
- rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
- DPRINTF("copying a be request\n");
-
- apply_rsp_hooks(rsp);
- write_rsp_to_fe_ring(rsp);
- }
- be_ring.rsp_cons = i;
-*/
- /* notify the domains */
-/*
- if (notify_be) {
- DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
- RING_PUSH_REQUESTS(&be_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
- }
-*/
- if (notify_fe) {
- DPRINTF("notifying fe\n");
- pthread_mutex_lock(&push_mutex);
- RING_PUSH_RESPONSES(&fe_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_FE);
- pthread_mutex_unlock(&push_mutex);
- }
- }
- }
-
-
- munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
- close(fd);
-
- open_failed:
- return 0;
-}
-
-void got_sig_bus() {
- printf("Attempted to access a page that isn't.\n");
- exit(-1);
-}
-
-void got_sig_int() {
- DPRINTF("quitting -- returning to passthrough mode.\n");
- if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
- close(fd);
- fd = 0;
- exit(0);
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain. Passthrough and interposition can be readded
- * once transitive grants are available.
- */
-
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202 */
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs: */
-#define BLKTAP_IOCTL_KICK_FE 1
-#define BLKTAP_IOCTL_KICK_BE 2
-#define BLKTAP_IOCTL_SETMODE 3
-#define BLKTAP_IOCTL_PRINT_IDXS 100
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
-#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
-#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
-#define BLKTAP_MODE_COPY_FE 0x00000004
-#define BLKTAP_MODE_COPY_BE 0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
- (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
- (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
- (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
- return (
- ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
- ( arg == BLKTAP_MODE_INTERPOSE ) );
-/*
- return (
- ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
- ( arg == BLKTAP_MODE_INTERPOSE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
- );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS 0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */
-#define BLKTAP_STOLEN 2 /* Hook has stolen request. */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
- char *name;
- int (*func)(struct blkif *, blkif_request_t *, int);
- struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
- char *name;
- int (*func)(struct blkif *, blkif_response_t *, int);
- struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
- long int (*get_size)(struct blkif *blkif);
- long int (*get_secsize)(struct blkif *blkif);
- unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
- domid_t domid;
- long int handle;
-
- long int pdev;
- long int readonly;
-
- enum { DISCONNECTED, CONNECTED } state;
-
- struct blkif_ops *ops;
- request_hook_t *request_hook_chain;
- response_hook_t *response_hook_chain;
-
- struct blkif *hash_next;
-
- void *prv; /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev,
- long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h,
- const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
- [BLKIF_OP_READ] = "READ",
- [BLKIF_OP_WRITE] = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-
-#endif /* __BLKTAPLIB_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/list.h
--- a/tools/blktap/list.h Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- *
- * This is a subset of linux's list.h intended to be used in user-space.
- *
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1 ((void *) 0x00100100)
-#define LIST_POISON2 ((void *) 0x00200200)
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
- struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
-{
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
- next->prev = prev;
- prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- entry->next = LIST_POISON1;
- entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR = /usr/sbin
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS :=
-PLX_SRCS := vdi.c
-PLX_SRCS += radix.c
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS += -Werror
-CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
-CFLAGS += $(INCLUDES)
-CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-IBINS = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
- $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
- rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
- $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
- $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source. In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates. Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image. All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache. As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth. So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room. You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore. You
-will need a disk image, either as a file or local partition. My
-general approach has been to
-
-(a) make a really big sparse file with
-
- dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
- mkfs.ext3 ./image
-
-(c) mount it using loopback
-
- mkdir ./mnt
- mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
- cd mnt
- tar ...
-
-NOTE: Beware if your system is FC3. mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
- cd ..
- umount mnt
-
-(f) now, create a new VDI to hold the image
-
- ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
- ./vdi_list
-
- | 0 My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
- ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go. If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings. So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
- ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
- ./vdi_snap_list 0
-
- | 16 0 Thu Mar 3 19:27:48 2005 565111 31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record. The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
- ./vdi_create "FC3 - Copy 1" 16 0
- ./vdi_list
-
- | 0 My new FC3 VDI
- | 1 FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
- disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend. If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
- ./parallax
-
-(c) create your new domain.
-
- xm create ...
-
----
-
-That's pretty much all there is to it at the moment. Hope this is
-clear enough to get you going. Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains. So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place. So at the moment, EVERY block
-write is a log append on the blockstore. I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- *
- * Asynchronous block wrappers for parallax.
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an
- * ring of io_structs.
- *
- * the block_* calls may either add an entry to this ring and return,
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry
- * about stack problems with this approach.
- */
-
-struct read_args {
- uint64_t addr;
-};
-
-struct write_args {
- uint64_t addr;
- char *block;
-};
-
-struct alloc_args {
- char *block;
-};
-
-struct pending_io_req {
- enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
- union {
- struct read_args r;
- struct write_args w;
- struct alloc_args a;
- } u;
- io_cb_t cb;
- void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
- int i;
-
- pthread_mutex_init(&r->lock, NULL);
- for (i=0; i < 1024; i++) {
- r->lines[i] = 0;
- r->waiters[i] = NULL;
- r->state[i] = ANY;
- }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE 10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
- (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
- int i;
-
- for (i=0; i<MAX_PENDING_IO; i++)
- pending_io_list[i] = i;
-
-}
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
- req->op = IO_READ;
- req->u.r.addr = addr;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
- req->op = IO_WRITE;
- req->u.w.addr = addr;
- req->u.w.block = block;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_ALLOC;
- req->u.a.block = block;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
- pthread_mutex_lock(&r->lock);
-
- if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
- r->lines[row]++;
- r->state[row] = READ;
- DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row);
- pthread_mutex_unlock(&r->lock);
- ret.type = IO_INT_T;
- ret.u.i = 0;
- cb(ret, param);
- } else {
- struct radix_wait **rwc;
- struct radix_wait *rw =
- (struct radix_wait *) malloc (sizeof(struct radix_wait));
- DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
- rw->type = RLOCK;
- rw->param = param;
- rw->cb = cb;
- rw->next = NULL;
- /* append to waiters list. */
- rwc = &r->waiters[row];
- while (*rwc != NULL) rwc = &(*rwc)->next;
- *rwc = rw;
- pthread_mutex_unlock(&r->lock);
- return;
- }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
- pthread_mutex_lock(&r->lock);
-
- /* the second check here is redundant -- just here for debugging now. */
- if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
- r->state[row] = STOP;
- r->lines[row] = -1;
- DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row);
- pthread_mutex_unlock(&r->lock);
- ret.type = IO_INT_T;
- ret.u.i = 0;
- cb(ret, param);
- } else {
- struct radix_wait **rwc;
- struct radix_wait *rw =
- (struct radix_wait *) malloc (sizeof(struct radix_wait));
- DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
- rw->type = WLOCK;
- rw->param = param;
- rw->cb = cb;
- rw->next = NULL;
- /* append to waiters list. */
- rwc = &r->waiters[row];
- while (*rwc != NULL) rwc = &(*rwc)->next;
- *rwc = rw;
- pthread_mutex_unlock(&r->lock);
- return;
- }
-
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
- struct pending_io_req *req;
- struct radix_wait *rw;
-
- if (r->lines[row] != 0) return;
- if (r->waiters[row] == NULL) return;
-
- if (r->waiters[row]->type == WLOCK) {
-
- rw = r->waiters[row];
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_WWAKE;
- req->cb = rw->cb;
- req->param = rw->param;
- r->lines[row] = -1; /* write lock the row. */
- r->state[row] = STOP;
- r->waiters[row] = rw->next;
- free(rw);
- pthread_mutex_unlock(&pending_io_lock);
-
- } else /* RLOCK */ {
-
- while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
- rw = r->waiters[row];
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_RWAKE;
- req->cb = rw->cb;
- req->param = rw->param;
- r->lines[row]++; /* read lock the row. */
- r->state[row] = READ;
- r->waiters[row] = rw->next;
- free(rw);
- pthread_mutex_unlock(&pending_io_lock);
- }
-
- if (r->waiters[row] != NULL) /* There is a write queued still */
- r->state[row] = STOP;
- }
-
- pthread_mutex_lock(&pending_io_lock);
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
-
- pthread_mutex_lock(&r->lock);
- assert(r->lines[row] > 0); /* try to catch misuse. */
- r->lines[row]--;
- if (r->lines[row] == 0) {
- r->state[row] = ANY;
- wake_waiters(r, row);
- }
- pthread_mutex_unlock(&r->lock);
- cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
-
- pthread_mutex_lock(&r->lock);
- assert(r->lines[row] == -1); /* try to catch misuse. */
- r->lines[row] = 0;
- r->state[row] = ANY;
- wake_waiters(r, row);
- pthread_mutex_unlock(&r->lock);
- cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
- struct io_ret ret;
- void *param;
-
- switch (req->op) {
- case IO_READ:
- ret.type = IO_BLOCK_T;
- ret.u.b = readblock(req->u.r.addr);
- break;
- case IO_WRITE:
- ret.type = IO_INT_T;
- ret.u.i = writeblock(req->u.w.addr, req->u.w.block);
- DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
- break;
- case IO_ALLOC:
- ret.type = IO_ADDR_T;
- ret.u.a = allocblock(req->u.a.block);
- break;
- case IO_RWAKE:
- DPRINTF("WAKE DEFERRED RLOCK!\n");
- ret.type = IO_INT_T;
- ret.u.i = 0;
- break;
- case IO_WWAKE:
- DPRINTF("WAKE DEFERRED WLOCK!\n");
- ret.type = IO_INT_T;
- ret.u.i = 0;
- break;
- default:
- DPRINTF("Unknown IO operation on pending list!\n");
- return;
- }
-
- param = req->param;
- pthread_mutex_lock(&pending_io_lock);
- pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
- pthread_mutex_unlock(&pending_io_lock);
-
- assert(req->cb != NULL);
- req->cb(ret, param);
-
-}
-
-void *io_thread(void *param)
-{
- int tid;
- struct pending_io_req *req;
-
- /* Set this thread's tid. */
- tid = *(int *)param;
- free(param);
-
-start:
- pthread_mutex_lock(&pending_io_lock);
- while (io_prod == io_cons) {
- pthread_cond_wait(&pending_io_cond, &pending_io_lock);
- }
-
- if (io_prod == io_cons) {
- /* unnecessary wakeup. */
- pthread_mutex_unlock(&pending_io_lock);
- goto start;
- }
-
- req = PENDING_IO_ENT(io_cons++);
- pthread_mutex_unlock(&pending_io_lock);
-
- do_next_io_req(req);
-
- goto start;
-
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{
- int i, tid=0;
-
- for (i=0; i < IO_POOL_SIZE; i++) {
- int ret, *t;
- t = (int *)malloc(sizeof(int));
- *t = tid++;
- ret = pthread_create(&io_pool[i], NULL, io_thread, t);
- if (ret != 0) printf("Error starting thread %d\n", i);
- }
-
-}
-
-void init_block_async(void)
-{
- init_pending_io();
- start_io_threads();
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- *
- * Asynchronous block wrappers for parallax.
- */
-
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
- enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
- union {
- uint64_t a;
- char *b;
- int i;
- } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
- enum {RLOCK, WLOCK} type;
- io_cb_t cb;
- void *param;
- struct radix_wait *next;
-};
-
-struct radix_lock {
- pthread_mutex_t lock;
- int lines[1024];
- struct radix_wait *waiters[1024];
- enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
- assert(r.type == IO_ADDR_T);
- return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
- assert(r.type == IO_BLOCK_T);
- return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
- assert(r.type == IO_INT_T);
- return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- *
- * blockstore.c
- *
- * Simple block store interface
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
- va_list args;
- fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
- pthread_mutex_t ptmutex;
- pthread_cond_t ptcv;
- int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
- pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
- pool_thread[tid].newdata = 1; \
- DB("CV Waking %u", tid); \
- pthread_cond_signal(&(pool_thread[tid].ptcv)); \
- pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
- pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
- if (pool_thread[tid].newdata) { \
- pool_thread[tid].newdata = 0; \
- DB("CV Woken %u", tid); \
- } \
- else { \
- DB("CV Waiting %u", tid); \
- pthread_cond_wait(&(pool_thread[tid].ptcv), \
- &(pool_thread[tid].ptmutex)); \
- } \
- pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
- struct bsq_t_struct *prev;
- struct bsq_t_struct *next;
- int status;
- int server;
- int length;
- struct msghdr msghdr;
- struct iovec iov[2];
- int tid;
- struct timeval tv_sent;
- bshdr_t message;
- void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
- uint64_t luid;
- ENTER_LUID_CR;
- luid = luid_cnt++;
- LEAVE_LUID_CR;
- return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
- for (q = bs_head; q; q = q->next) {
- fprintf(stderr, " luid=%016llx server=%u\n",
- q->message.luid, q->server);
- }
- LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
- ENTER_QUEUE_CR;
- qe->next = NULL;
- qe->prev = bs_tail;
- if (!bs_head)
- bs_head = qe;
- else
- bs_tail->next = qe;
- bs_tail = qe;
- bs_qlen++;
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("enqueue");
-#endif
- return 0;
-}
-
-int dequeue(bsq_t *qe) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- for (q = bs_head; q; q = q->next) {
- if (q == qe) {
- if (q->prev)
- q->prev->next = q->next;
- else
- bs_head = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else
- bs_tail = q->prev;
- bs_qlen--;
- goto found;
- }
- }
-
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("dequeue not found");
-#endif
- return 0;
-
- found:
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("dequeue not found");
-#endif
- return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- for (q = bs_head; q; q = q->next) {
- if ((qe->server == q->server) &&
- (qe->message.operation == q->message.operation) &&
- (qe->message.luid == q->message.luid)) {
-
- if ((q->message.operation == BSOP_READBLOCK) &&
- ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
- q->block = qe->block;
- qe->block = NULL;
- }
- q->length = qe->length;
- q->message.flags = qe->message.flags;
- q->message.id = qe->message.id;
- q->status |= BSQ_STATUS_MATCHED;
-
- if (q->prev)
- q->prev->next = q->next;
- else
- bs_head = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else
- bs_tail = q->prev;
- q->next = NULL;
- q->prev = NULL;
- bs_qlen--;
- goto found;
- }
- }
-
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("queuesearch not found");
-#endif
- return NULL;
-
- found:
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("queuesearch found");
-#endif
- return q;
-}
-
-/*****************************************************************************
- * Network communication *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
- int rc;
-
- qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
- qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
- qe->msghdr.msg_iov = qe->iov;
- if (qe->block)
- qe->msghdr.msg_iovlen = 2;
- else
- qe->msghdr.msg_iovlen = 1;
- qe->msghdr.msg_control = NULL;
- qe->msghdr.msg_controllen = 0;
- qe->msghdr.msg_flags = 0;
-
- qe->iov[0].iov_base = (void *)&(qe->message);
- qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
- if (qe->block) {
- qe->iov[1].iov_base = qe->block;
- qe->iov[1].iov_len = BLOCK_SIZE;
- }
-
- qe->message.luid = new_luid();
-
- qe->status = 0;
- qe->tid = (int)pthread_getspecific(tid_key);
- if (enqueue(qe) < 0) {
- fprintf(stderr, "Error enqueuing request.\n");
- return -1;
- }
-
- gettimeofday(&(qe->tv_sent), NULL);
- DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
- rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
- //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
- // (struct sockaddr *)&(bsservers[qe->server].sin),
- // sizeof(struct sockaddr_in));
- if (rc < 0)
- return rc;
-
- return rc;
-}
-
-int recv_message(bsq_t *qe) {
- struct sockaddr_in from;
- //int flen = sizeof(from);
- int rc;
-
- qe->msghdr.msg_name = &from;
- qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
- qe->msghdr.msg_iov = qe->iov;
- if (qe->block)
- qe->msghdr.msg_iovlen = 2;
- else
- qe->msghdr.msg_iovlen = 1;
- qe->msghdr.msg_control = NULL;
- qe->msghdr.msg_controllen = 0;
- qe->msghdr.msg_flags = 0;
-
- qe->iov[0].iov_base = (void *)&(qe->message);
- qe->iov[0].iov_len = MSGBUFSIZE_ID;
- if (qe->block) {
- qe->iov[1].iov_base = qe->block;
- qe->iov[1].iov_len = BLOCK_SIZE;
- }
-
- rc = recvmsg(bssock, &(qe->msghdr), 0);
-
- //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
- // (struct sockaddr *)&from, &flen);
- return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
- int i;
-
-#ifdef BSDEBUG2
- fprintf(stderr,
- "get_server_number(%u.%u.%u.%u/%u)\n",
- (unsigned int)sin->sin_addr.s_addr & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
- (unsigned int)sin->sin_port);
-#endif
-
- for (i = 0; i < MAX_SERVERS; i++) {
- if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
- fprintf(stderr,
- "get_server_number check %u.%u.%u.%u/%u\n",
- (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >>
16)&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >>
24)&0xff,
- (unsigned int)bsservers[i].sin.sin_port);
-#endif
- if ((sin->sin_family == bsservers[i].sin.sin_family) &&
- (sin->sin_port == bsservers[i].sin.sin_port) &&
- (memcmp((void *)&(sin->sin_addr),
- (void *)&(bsservers[i].sin.sin_addr),
- sizeof(struct in_addr)) == 0)) {
- return i;
- }
- }
- }
-
- return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
- struct sockaddr_in from;
- int rc;
-
- DB("ENTER recv_any\n");
-
- rx_qe.msghdr.msg_name = &from;
- rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
- rx_qe.msghdr.msg_iov = rx_qe.iov;
- if (!rx_buffer) {
- rx_buffer = malloc(BLOCK_SIZE);
- if (!rx_buffer) {
- perror("recv_any malloc");
- return NULL;
- }
- }
- rx_qe.block = rx_buffer;
- rx_buffer = NULL;
- rx_qe.msghdr.msg_iovlen = 2;
- rx_qe.msghdr.msg_control = NULL;
- rx_qe.msghdr.msg_controllen = 0;
- rx_qe.msghdr.msg_flags = 0;
-
- rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
- rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
- rx_qe.iov[1].iov_base = rx_qe.block;
- rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
- rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
- if (rc < 0) {
- perror("recv_any");
- return NULL;
- }
-
- rx_qe.length = rc;
- rx_qe.server = get_server_number(&from);
-
- DB("recv_any from %d luid=%016llx len=%u\n",
- rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
- return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
- if (q->block) {
- rx_buffer = q->block;
- q->block = NULL;
- }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
- bsq_t *q, *m;
- unsigned int x, i;
- int tid = (int)pthread_getspecific(tid_key);
-
- DB("ENTER wait_recv %u\n", numreqs);
-
- checkmatch:
- x = 0xffffffff;
- for (i = 0; i < numreqs; i++) {
- x &= reqs[i]->status;
- }
- if ((x & BSQ_STATUS_MATCHED)) {
- DB("LEAVE wait_recv\n");
- return numreqs;
- }
-
- RECV_AWAIT(tid);
-
- /*
- rxagain:
- ENTER_RECV_CR;
- q = recv_any();
- LEAVE_RECV_CR;
- if (!q)
- return -1;
-
- m = queuesearch(q);
- recv_recycle_buffer(q);
- if (!m) {
- fprintf(stderr, "Unmatched RX\n");
- goto rxagain;
- }
- */
-
- goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
- int rc;
- gettimeofday(&(qe->tv_sent), NULL);
- DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
- retry_count++;
- rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
- if (rc < 0)
- return rc;
- return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
- for (;;) {
- struct timeval now;
- long long nowus, sus;
- bsq_t *q;
- int r;
-
- sleep(1);
-
- gettimeofday(&now, NULL);
- nowus = now.tv_usec + now.tv_sec * 1000000;
- ENTER_QUEUE_CR;
- r = retry_count;
- for (q = bs_head; q; q = q->next) {
- sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
- if ((nowus - sus) > RETRY_TIMEOUT) {
- if (retry(q) < 0) {
- fprintf(stderr, "Error on sendmsg retry.\n");
- }
- }
- }
- if (r != retry_count) {
- fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
- }
- LEAVE_QUEUE_CR;
- }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
- bsq_t *q, *m;
-
- for(;;) {
- q = recv_any();
- if (!q) {
- fprintf(stderr, "recv_any error\n");
- }
- else {
- m = queuesearch(q);
- recv_recycle_buffer(q);
- if (!m) {
- fprintf(stderr, "Unmatched RX\n");
- }
- else {
- DB("RX MATCH");
- RECV_NOTIFY(m->tid);
- }
- }
- }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
- void *block;
- bsq_t *qe;
- int len, rc;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("readblock qe malloc");
- return NULL;
- }
- qe->block = NULL;
-
- /*
- qe->block = malloc(BLOCK_SIZE);
- if (!qe->block) {
- perror("readblock qe malloc");
- free((void *)qe);
- return NULL;
- }
- */
-
- qe->server = server;
-
- qe->message.operation = BSOP_READBLOCK;
- qe->message.flags = 0;
- qe->message.id = id;
- qe->length = MSGBUFSIZE_ID;
-
- if (send_message(qe) < 0) {
- perror("readblock sendto");
- goto err;
- }
-
- /*len = recv_message(qe);
- if (len < 0) {
- perror("readblock recv");
- goto err;
- }*/
-
- rc = wait_recv(&qe, 1);
- if (rc < 0) {
- perror("readblock recv");
- goto err;
- }
-
- if ((qe->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "readblock server error\n");
- goto err;
- }
- if (qe->length < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "readblock recv short (%u)\n", len);
- goto err;
- }
- /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
- perror("readblock malloc");
- goto err;
- }
- memcpy(block, qe->message.block, BLOCK_SIZE);
- */
- block = qe->block;
-
- free((void *)qe);
- return block;
-
- err:
- free(qe->block);
- free((void *)qe);
- return NULL;
-}
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- *
- * @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
- int map = (int)BSID_MAP(id);
- uint64_t xid;
- static int i = CLUSTER_MAX_REPLICAS - 1;
- void *block = NULL;
-
- /* special case for the "superblock" just use the first block on the
- * first replica. (extend to blocks < 6 for vdi bug)
- */
- if (id < 6) {
- block = readblock_indiv(bsclusters[map].servers[0], id);
- goto out;
- }
-
- i++;
- if (i >= CLUSTER_MAX_REPLICAS)
- i = 0;
- switch (i) {
- case 0:
- xid = BSID_REPLICA0(id);
- break;
- case 1:
- xid = BSID_REPLICA1(id);
- break;
- case 2:
- xid = BSID_REPLICA2(id);
- break;
- }
-
- block = readblock_indiv(bsclusters[map].servers[i], xid);
-
- out:
-#ifdef BSDEBUG
- if (block)
- fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- id,
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
- else
- fprintf(stderr, "READ: %016llx NULL\n", id);
-#endif
- return block;
-}
-
-/*****************************************************************************
- * Writing *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
- bsq_t *qe;
- int len;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("writeblock qe malloc");
- goto err;
- }
- qe->server = server;
-
- qe->message.operation = BSOP_WRITEBLOCK;
- qe->message.flags = 0;
- qe->message.id = id;
- //memcpy(qe->message.block, block, BLOCK_SIZE);
- qe->block = block;
- qe->length = MSGBUFSIZE_BLOCK;
-
- if (send_message(qe) < 0) {
- perror("writeblock sendto");
- goto err;
- }
-
- return qe;
-
- err:
- free((void *)qe);
- return NULL;
-}
-
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-
- int map = (int)BSID_MAP(id);
- int rep0 = bsclusters[map].servers[0];
- int rep1 = bsclusters[map].servers[1];
- int rep2 = bsclusters[map].servers[2];
- bsq_t *reqs[3];
- int rc;
-
- reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
- fprintf(stderr,
- "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- id,
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
-#endif
-
- /* special case for the "superblock" just use the first block on the
- * first replica. (extend to blocks < 6 for vdi bug)
- */
- if (id < 6) {
- reqs[0] = writeblock_indiv(rep0, id, block);
- if (!reqs[0])
- return -1;
- rc = wait_recv(reqs, 1);
- return rc;
- }
-
- reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
- if (!reqs[0])
- goto err;
- reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
- if (!reqs[1])
- goto err;
- reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
- if (!reqs[2])
- goto err;
-
- rc = wait_recv(reqs, 3);
- if (rc < 0) {
- perror("writeblock recv");
- goto err;
- }
- if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server0 error\n");
- goto err;
- }
- if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server1 error\n");
- goto err;
- }
- if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server2 error\n");
- goto err;
- }
-
-
- free((void *)reqs[0]);
- free((void *)reqs[1]);
- free((void *)reqs[2]);
- return 0;
-
- err:
- if (reqs[0]) {
- dequeue(reqs[0]);
- free((void *)reqs[0]);
- }
- if (reqs[1]) {
- dequeue(reqs[1]);
- free((void *)reqs[1]);
- }
- if (reqs[2]) {
- dequeue(reqs[2]);
- free((void *)reqs[2]);
- }
- return -1;
-}
-
-/*****************************************************************************
- * Allocation *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
- return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
- bsq_t *qe;
- int len;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("allocblock_hint qe malloc");
- goto err;
- }
- qe->server = server;
-
- qe->message.operation = BSOP_ALLOCBLOCK;
- qe->message.flags = 0;
- qe->message.id = hint;
- //memcpy(qe->message.block, block, BLOCK_SIZE);
- qe->block = block;
- qe->length = MSGBUFSIZE_BLOCK;
-
- if (send_message(qe) < 0) {
- perror("allocblock_hint sendto");
- goto err;
- }
-
- return qe;
-
- err:
- free((void *)qe);
- return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- * @block: pointer to block
- * @hint: allocation hint
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
- int map = (int)hint;
- int rep0 = bsclusters[map].servers[0];
- int rep1 = bsclusters[map].servers[1];
- int rep2 = bsclusters[map].servers[2];
- bsq_t *reqs[3];
- int rc;
- uint64_t id0, id1, id2;
-
- reqs[0] = reqs[1] = reqs[2] = NULL;
-
- DB("ENTER allocblock\n");
-
- reqs[0] = allocblock_hint_indiv(rep0, block, hint);
- if (!reqs[0])
- goto err;
- reqs[1] = allocblock_hint_indiv(rep1, block, hint);
- if (!reqs[1])
- goto err;
- reqs[2] = allocblock_hint_indiv(rep2, block, hint);
- if (!reqs[2])
- goto err;
-
- rc = wait_recv(reqs, 3);
- if (rc < 0) {
- perror("allocblock recv");
- goto err;
- }
- if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server0 error\n");
- goto err;
- }
- if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server1 error\n");
- goto err;
- }
- if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server2 error\n");
- goto err;
- }
-
- id0 = reqs[0]->message.id;
- id1 = reqs[1]->message.id;
- id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
- fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- BSID(map, id0, id1, id2),
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
-#endif
-
- free((void *)reqs[0]);
- free((void *)reqs[1]);
- free((void *)reqs[2]);
- return BSID(map, id0, id1, id2);
-
- err:
- if (reqs[0]) {
- dequeue(reqs[0]);
- free((void *)reqs[0]);
- }
- if (reqs[1]) {
- dequeue(reqs[1]);
- free((void *)reqs[1]);
- }
- if (reqs[2]) {
- dequeue(reqs[2]);
- free((void *)reqs[2]);
- }
- return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version *
- *****************************************************************************/
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- *
- * @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
- void *block;
- int block_fp;
-
-//printf("readblock(%llu)\n", id);
- block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return NULL;
- }
-
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- printf ("%Ld ", id);
- printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
- perror("readblock lseek");
- goto err;
- }
- if ((block = malloc(BLOCK_SIZE)) == NULL) {
- perror("readblock malloc");
- goto err;
- }
- if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("readblock read");
- free(block);
- goto err;
- }
- close(block_fp);
- return block;
-
-err:
- close(block_fp);
- return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-
- int block_fp;
-
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- }
-
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- perror("writeblock lseek");
- goto err;
- }
- if (write(block_fp, block, BLOCK_SIZE) < 0) {
- perror("writeblock write");
- goto err;
- }
- close(block_fp);
- return 0;
-
-err:
- close(block_fp);
- return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
- uint64_t lb;
- off64_t pos;
- int block_fp;
-
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return 0;
- }
-
- pos = lseek64(block_fp, 0, SEEK_END);
- if (pos == (off64_t)-1) {
- perror("allocblock lseek");
- goto err;
- }
- if (pos % BLOCK_SIZE != 0) {
- fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- goto err;
- }
- if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("allocblock write");
- goto err;
- }
- lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
- close(block_fp);
- return lb;
-
-err:
- close(block_fp);
- return 0;
-
-}
-
-/**
- * allocblock_hint: write a new block to disk
- * @block: pointer to block
- * @hint: allocation hint
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
- return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- * @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
- void *block = malloc(BLOCK_SIZE);
- if (block == NULL) {
- perror("newblock");
- return NULL;
- }
- memset(block, 0, BLOCK_SIZE);
- return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- * @id: block id (zero if this is only in-memory)
- * @block: block to be freed
- */
-void freeblock(void *block) {
- free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
- freeblock_t *fb;
-
- fb = newblock();
-
- if (fb == NULL) return NULL;
-
- fb->magic = FREEBLOCK_MAGIC;
- fb->next = 0ULL;
- fb->count = 0ULL;
- memset(fb->list, 0, sizeof fb->list);
-
- return fb;
-}
-
-void releaseblock(uint64_t id)
-{
- blockstore_super_t *bs_super;
- freeblock_t *fl_current;
-
- /* get superblock */
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-
- /* get freeblock_current */
- if (bs_super->freelist_current == 0ULL)
- {
- fl_current = new_freeblock();
- bs_super->freelist_current = allocblock(fl_current);
- writeblock(BLOCKSTORE_SUPER, bs_super);
- } else {
- fl_current = readblock(bs_super->freelist_current);
- }
-
- /* if full, chain to superblock and allocate new current */
-
- if (fl_current->count == FREEBLOCK_SIZE) {
- fl_current->next = bs_super->freelist_full;
- writeblock(bs_super->freelist_current, fl_current);
- bs_super->freelist_full = bs_super->freelist_current;
- freeblock(fl_current);
- fl_current = new_freeblock();
- bs_super->freelist_current = allocblock(fl_current);
- writeblock(BLOCKSTORE_SUPER, bs_super);
- }
-
- /* append id to current */
- fl_current->list[fl_current->count++] = id;
- writeblock(bs_super->freelist_current, fl_current);
-
- freeblock(fl_current);
- freeblock(bs_super);
-
-
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
- blockstore_super_t *bs_super;
- freeblock_t *fb;
- uint64_t total = 0, next;
-
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-
- if (bs_super->freelist_current == 0ULL) {
- printf("freelist is empty!\n");
- return;
- }
-
- fb = readblock(bs_super->freelist_current);
- printf("%Ld entires on current.\n", fb->count);
- total += fb->count;
- if (print_each == 1)
- {
- int i;
- for (i=0; i< fb->count; i++)
- printf(" %Ld\n", fb->list[i]);
- }
-
- freeblock(fb);
-
- if (bs_super->freelist_full == 0ULL) {
- printf("freelist_full is empty!\n");
- return;
- }
-
- next = bs_super->freelist_full;
- for (;;) {
- fb = readblock(next);
- total += fb->count;
- if (print_each == 1)
- {
- int i;
- for (i=0; i< fb->count; i++)
- printf(" %Ld\n", fb->list[i]);
- }
- next = fb->next;
- freeblock(fb);
- if (next == 0ULL) break;
- }
- printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
- int i;
- blockstore_super_t *bs_super;
- uint64_t ret;
- int block_fp;
-
-#ifdef BLOCKSTORE_REMOTE
- struct hostent *addr;
-
- pthread_mutex_init(&ptmutex_queue, NULL);
- pthread_mutex_init(&ptmutex_luid, NULL);
- pthread_mutex_init(&ptmutex_recv, NULL);
- /*pthread_mutex_init(&ptmutex_notify, NULL);*/
- for (i = 0; i <= READ_POOL_SIZE; i++) {
- pool_thread[i].newdata = 0;
- pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
- pthread_cond_init(&(pool_thread[i].ptcv), NULL);
- }
-
- bsservers[0].hostname = "firebug.cl.cam.ac.uk";
- bsservers[1].hostname = "planb.cl.cam.ac.uk";
- bsservers[2].hostname = "simcity.cl.cam.ac.uk";
- bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
- bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
- bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
- bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
- bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
- bsservers[8].hostname = NULL;
- bsservers[9].hostname = NULL;
- bsservers[10].hostname = NULL;
- bsservers[11].hostname = NULL;
- bsservers[12].hostname = NULL;
- bsservers[13].hostname = NULL;
- bsservers[14].hostname = NULL;
- bsservers[15].hostname = NULL;
-
- for (i = 0; i < MAX_SERVERS; i++) {
- if (!bsservers[i].hostname)
- continue;
- addr = gethostbyname(bsservers[i].hostname);
- if (!addr) {
- perror("bad hostname");
- return -1;
- }
- bsservers[i].sin.sin_family = addr->h_addrtype;
- bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
- bsservers[i].sin.sin_addr.s_addr =
- ((struct in_addr *)(addr->h_addr))->s_addr;
- }
-
- /* Cluster map
- */
- bsclusters[0].servers[0] = 0;
- bsclusters[0].servers[1] = 1;
- bsclusters[0].servers[2] = 2;
- bsclusters[1].servers[0] = 1;
- bsclusters[1].servers[1] = 2;
- bsclusters[1].servers[2] = 3;
- bsclusters[2].servers[0] = 2;
- bsclusters[2].servers[1] = 3;
- bsclusters[2].servers[2] = 4;
- bsclusters[3].servers[0] = 3;
- bsclusters[3].servers[1] = 4;
- bsclusters[3].servers[2] = 5;
- bsclusters[4].servers[0] = 4;
- bsclusters[4].servers[1] = 5;
- bsclusters[4].servers[2] = 6;
- bsclusters[5].servers[0] = 5;
- bsclusters[5].servers[1] = 6;
- bsclusters[5].servers[2] = 7;
- bsclusters[6].servers[0] = 6;
- bsclusters[6].servers[1] = 7;
- bsclusters[6].servers[2] = 0;
- bsclusters[7].servers[0] = 7;
- bsclusters[7].servers[1] = 0;
- bsclusters[7].servers[2] = 1;
-
- /* Local socket set up
- */
- bssock = socket(AF_INET, SOCK_DGRAM, 0);
- if (bssock < 0) {
- perror("Bad socket");
- return -1;
- }
- memset(&sin_local, 0, sizeof(sin_local));
- sin_local.sin_family = AF_INET;
- sin_local.sin_port = htons(BLOCKSTORED_PORT);
- sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
- perror("bind");
- close(bssock);
- return -1;
- }
-
- pthread_create(&pthread_recv, NULL, receive_loop, NULL);
- pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- exit(-1);
- }
-
- if (lseek(block_fp, 0, SEEK_END) == 0) {
- bs_super = newblock();
- bs_super->magic = BLOCKSTORE_MAGIC;
- bs_super->freelist_full = 0LL;
- bs_super->freelist_current = 0LL;
-
- ret = allocblock(bs_super);
-
- freeblock(bs_super);
- } else {
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
- if (bs_super->magic != BLOCKSTORE_MAGIC)
- {
- printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
- exit(-1);
- }
- freeblock(bs_super);
- }
-
- close(block_fp);
-
-#endif /* BLOCKSTORE_REMOTE */
- return 0;
-}
-
-void __exit_blockstore(void)
-{
- int i;
-#ifdef BLOCKSTORE_REMOTE
- pthread_mutex_destroy(&ptmutex_recv);
- pthread_mutex_destroy(&ptmutex_luid);
- pthread_mutex_destroy(&ptmutex_queue);
- /*pthread_mutex_destroy(&ptmutex_notify);
- pthread_cond_destroy(&ptcv_notify);*/
- for (i = 0; i <= READ_POOL_SIZE; i++) {
- pthread_mutex_destroy(&(pool_thread[i].ptmutex));
- pthread_cond_destroy(&(pool_thread[i].ptcv));
- }
-#endif
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- *
- * blockstore.h
- *
- * Simple block store interface
- *
- */
-
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE 4096
-#define BLOCK_SHIFT 12
-#define BLOCK_MASK 0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
-#endif
-
-#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(uint64_t)) - (3 *
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
- uint64_t magic;
- uint64_t next;
- uint64_t count;
- uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t;
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
- uint64_t magic;
- uint64_t freelist_full;
- uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
- uint32_t operation;
- uint32_t flags;
- uint64_t id;
- uint64_t luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
- bshdr_t hdr;
- unsigned char block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID (sizeof(uint32_t) + sizeof(uint32_t) +
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK 0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK 0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
- Interim ID format is
-
- 63 60 59 40 39 20 19 0
- +----+--------------------+--------------------+--------------------+
- |map | replica 2 | replica 1 | replica 0 |
- +----+--------------------+--------------------+--------------------+
-
- The map is an index into a table detailing which machines form the
- cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id) (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
- (((uint64_t)(_rep2))<<40) | \
- (((uint64_t)(_rep1))<<20) |
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
- char *hostname;
- struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
- int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- *
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-
- struct sockaddr_in sn;
- int sock;
-
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- perror("Bad socket");
- return -1;
- }
- memset(&sn, 0, sizeof(sn));
- sn.sin_family = AF_INET;
- sn.sin_port = htons(port);
- sn.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
- perror("bind");
- close(sock);
- return -1;
- }
-
- return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
- int rc;
-
-#ifdef BSDEBUG
- fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
- len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t
*)buffer)->hdr.id);
-#endif
- rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer,
sizeof(*peer));
- if (rc < 0) {
- perror("send_reply");
- return 1;
- }
-
-
- return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
- for (;;) {
- int rc, len;
- struct sockaddr_in from;
- size_t slen = sizeof(from);
- uint64_t bid;
-
- len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
- (struct sockaddr *)&from, &slen);
-
- if (len < 0) {
- perror("recvfrom");
- continue;
- }
-
- if (len < MSGBUFSIZE_OP) {
- fprintf(stderr, "Short packet.\n");
- continue;
- }
-
-#ifdef BSDEBUG
- fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
- len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
- switch (msgbuf.hdr.operation) {
- case BSOP_READBLOCK:
- if (len < MSGBUFSIZE_ID) {
- fprintf(stderr, "Short packet (readblock %u).\n", len);
- continue;
- }
- rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
- if (rc < 0) {
- fprintf(stderr, "readblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
- break;
- case BSOP_WRITEBLOCK:
- if (len < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "Short packet (writeblock %u).\n", len);
- continue;
- }
- rc = writeblock(msgbuf.hdr.id, msgbuf.block);
- if (rc < 0) {
- fprintf(stderr, "writeblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- break;
- case BSOP_ALLOCBLOCK:
- if (len < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "Short packet (allocblock %u).\n", len);
- continue;
- }
- bid = allocblock(msgbuf.block);
- if (bid == ALLOCFAIL) {
- fprintf(stderr, "allocblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.id = bid;
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- break;
- }
-
- }
-}
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- * @block: pointer to buffer to receive block
- *
- * @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
- perror("readblock lseek");
- return -1;
- }
- if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("readblock read");
- return -1;
- }
- return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- perror("writeblock lseek");
- return -1;
- }
- if (write(block_fp, block, BLOCK_SIZE) < 0) {
- perror("writeblock write");
- return -1;
- }
- return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
- uint64_t lb;
- off64_t pos;
-
- retry:
- pos = lseek64(block_fp, 0, SEEK_END);
- if (pos == (off64_t)-1) {
- perror("allocblock lseek");
- return ALLOCFAIL;
- }
- if (pos % BLOCK_SIZE != 0) {
- fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- return ALLOCFAIL;
- }
- if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("allocblock write");
- return ALLOCFAIL;
- }
- lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
- if (lb < BS_ALLOC_SKIP)
- goto retry;
-#endif
-
- if (lb <= lastblock)
- printf("[*** %Ld alredy allocated! ***]\n", lb);
-
- lastblock = lb;
- return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- * @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
- void *block = malloc(BLOCK_SIZE);
- if (block == NULL) {
- perror("newblock");
- return NULL;
- }
- memset(block, 0, BLOCK_SIZE);
- return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- * @id: block id (zero if this is only in-memory)
- * @block: block to be freed
- */
-void freeblock(void *block) {
- free(block);
-}
-
-
-int main(int argc, char **argv)
-{
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- }
-
- bssock = open_socket(BLOCKSTORED_PORT);
- if (bssock < 0) {
- return -1;
- }
-
- service_loop();
-
- close(bssock);
-
- return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- *
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
- struct sockaddr_in sn, peer;
- int sock;
- bsmsg_t msgbuf;
- int rc, slen;
- struct hostent *addr;
-
- addr = gethostbyname(host);
- if (!addr) {
- perror("bad hostname");
- exit(1);
- }
- peer.sin_family = addr->h_addrtype;
- peer.sin_port = htons(BLOCKSTORED_PORT);
- peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr;
- fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
- (unsigned int)(unsigned char)addr->h_addr[0],
- (unsigned int)(unsigned char)addr->h_addr[1],
- (unsigned int)(unsigned char)addr->h_addr[2],
- (unsigned int)(unsigned char)addr->h_addr[3]);
-
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- perror("Bad socket");
- exit(1);
- }
- memset(&sn, 0, sizeof(sn));
- sn.sin_family = AF_INET;
- sn.sin_port = htons(BLOCKSTORED_PORT);
- sn.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
- perror("bind");
- close(sock);
- exit(1);
- }
-
- memset((void *)&msgbuf, 0, sizeof(msgbuf));
- msgbuf.operation = op;
- msgbuf.id = id;
-
- rc = sendto(sock, (void *)&msgbuf, len, 0,
- (struct sockaddr *)&peer, sizeof(peer));
- if (rc < 0) {
- perror("sendto");
- exit(1);
- }
-
- slen = sizeof(peer);
- len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
- (struct sockaddr *)&peer, &slen);
- if (len < 0) {
- perror("recvfrom");
- exit(1);
- }
-
- printf("Reply %u bytes:\n", len);
- if (len >= MSGBUFSIZE_OP)
- printf(" operation: %u\n", msgbuf.operation);
- if (len >= MSGBUFSIZE_FLAGS)
- printf(" flags: 0x%x\n", msgbuf.flags);
- if (len >= MSGBUFSIZE_ID)
- printf(" id: %llu\n", msgbuf.id);
- if (len >= (MSGBUFSIZE_ID + 4))
- printf(" data: %02x %02x %02x %02x...\n",
- (unsigned int)msgbuf.block[0],
- (unsigned int)msgbuf.block[1],
- (unsigned int)msgbuf.block[2],
- (unsigned int)msgbuf.block[3]);
-
- if (sock > 0)
- close(sock);
-
- return 0;
-}
-
-int main (int argc, char **argv) {
-
- uint32_t op = 0;
- uint64_t id = 0;
- int len = 0, rc;
- void *block;
-
- if (argc < 3) {
- fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
- return 1;
- }
-
- switch (argv[2][0]) {
- case 'r':
- case 'R':
- op = BSOP_READBLOCK;
- len = MSGBUFSIZE_ID;
- break;
- case 'w':
- case 'W':
- op = BSOP_WRITEBLOCK;
- len = MSGBUFSIZE_BLOCK;
- break;
- case 'a':
- case 'A':
- op = BSOP_ALLOCBLOCK;
- len = MSGBUFSIZE_BLOCK;
- break;
- default:
- fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
- return 1;
- }
-
- if (argc >= 4)
- id = atoll(argv[3]);
-
- if (strcmp(argv[1], "X") == 0) {
- rc = __init_blockstore();
- if (rc < 0) {
- fprintf(stderr, "blockstore init failed.\n");
- return 1;
- }
- switch(op) {
- case BSOP_READBLOCK:
- block = readblock(id);
- if (block) {
- printf("data: %02x %02x %02x %02x...\n",
- (unsigned int)((unsigned char*)block)[0],
- (unsigned int)((unsigned char*)block)[1],
- (unsigned int)((unsigned char*)block)[2],
- (unsigned int)((unsigned char*)block)[3]);
- }
- break;
- case BSOP_WRITEBLOCK:
- block = malloc(BLOCK_SIZE);
- if (!block) {
- perror("bstest malloc");
- return 1;
- }
- memset(block, 0, BLOCK_SIZE);
- rc = writeblock(id, block);
- if (rc != 0) {
- printf("error\n");
- }
- else {
- printf("OK\n");
- }
- break;
- case BSOP_ALLOCBLOCK:
- block = malloc(BLOCK_SIZE);
- if (!block) {
- perror("bstest malloc");
- return 1;
- }
- memset(block, 0, BLOCK_SIZE);
- id = allocblock_hint(block, id);
- if (id == 0) {
- printf("error\n");
- }
- else {
- printf("ID: %llu\n", id);
- }
- break;
- }
- }
- else {
- direct(argv[1], op, id, len);
- }
-
-
- return 0;
-}
diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c Fri Jun 23 15:26:01 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- *
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV 61440
-#define SECTS_PER_NODE 8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
- domid_t domid;
- unsigned int handle;
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- vdi_t *vdi_hash[VDI_HASHSZ];
- struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- if ( handle != 0 )
- printf("blktap/parallax don't currently support non-0 dev handles!\n");
-
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
- return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
- vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-
- while ((vdi != NULL) && (vdi->vdevice != device))
- vdi = vdi->next;
-
- return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
- domid_t domid = create->domid;
- unsigned int handle = create->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- DPRINTF("parallax (blkif_create): create is %p\n", create);
-
- if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
- {
- DPRINTF("Could not create blkif: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->status = DISCONNECTED;
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTF("Could not create blkif: already exists (%d,%d)\n",
- domid, handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- free(blkif);
- return;
- }
- pblkif = &(*pblkif)->hash_next;
- }
-
- blkif->hash_next = *pblkif;
- *pblkif = blkif;
-
- DPRINTF("Successfully created blkif\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy);
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- if ( blkif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pblkif = &blkif->hash_next;
- }
-
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pblkif = blkif->hash_next;
- free(blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
- blkif_t *blkif;
- vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = create->vdevice;
-
- DPRINTF("parallax (vbd_create): create=%p\n", create);
-
- blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
- if ( blkif == NULL )
- {
- DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n",
- create->domid, create->blkif_handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- /* VDI identifier is in grow->extent.sector_start */
- DPRINTF("vbd_create: create->dev_handle (id) is %lx\n",
- (unsigned long)create->dev_handle);
-
- vdi = vdi_get(create->dev_handle);
- if (vdi == NULL)
- {
- printf("parallax (vbd_create): VDI %lx not found.\n",
- (unsigned long)create->dev_handle);
- create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
- }
-
- vdi->next = NULL;
- vdi->vdevice = vdevice;
- vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
- while (*vdip != NULL)
- vdip = &(*vdip)->next;
- *vdip = vdi;
-
- DPRINTF("blkif_create succeeded\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
- blkif_t *blkif;
- vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = destroy->vdevice;
-
- blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
- if ( blkif == NULL )
- {
- DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
- destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|