# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 1bab7d65171b762bb3cf1ae426bc6c403f847ebf
# Parent 4ba0982264290acfa208304b4e3343ec8c3ec903
# Parent 3e6325b73474b3764573178152503af27a914ab8
merge with xen-unstable.hg
---
xen/arch/powerpc/htab.c | 68 --
.hgignore | 2
extras/mini-os/Makefile | 3
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 7
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 42 -
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c | 56 +
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 2
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 11
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 10
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 4
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 8
linux-2.6-xen-sparse/drivers/xen/blktap/common.h | 1
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c | 23
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 16
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 10
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 67 +-
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c | 12
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 21
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 34 -
linux-2.6-xen-sparse/include/xen/xenbus.h | 6
tools/blktap/drivers/block-aio.c | 19
tools/blktap/drivers/block-qcow.c | 19
tools/blktap/drivers/tapdisk.c | 1
tools/blktap/lib/xs_api.c | 23
tools/libxc/ia64/xc_ia64_stubs.c | 16
tools/libxc/powerpc64/xc_linux_build.c | 4
tools/libxc/xenctrl.h | 5
tools/python/xen/xend/FlatDeviceTree.py | 323
++++++++++
tools/python/xen/xend/XendCheckpoint.py | 6
tools/python/xen/xend/XendDomain.py | 3
tools/python/xen/xend/XendDomainInfo.py | 63 +
tools/python/xen/xend/arch.py | 32
tools/python/xen/xend/image.py | 143 ++--
tools/python/xen/xend/server/DevController.py | 6
tools/python/xen/xend/server/XMLRPCServer.py | 3
tools/python/xen/xend/server/blkif.py | 16
tools/python/xen/xm/migrate.py | 3
tools/python/xen/xm/shutdown.py | 49 +
xen/arch/ia64/xen/dom0_ops.c | 4
xen/arch/powerpc/Makefile | 31
xen/arch/powerpc/Rules.mk | 2
xen/arch/powerpc/backtrace.c | 193 +++++
xen/arch/powerpc/boot_of.c | 208 ++++--
xen/arch/powerpc/dart_u3.c | 8
xen/arch/powerpc/dom0_ops.c | 20
xen/arch/powerpc/domain.c | 58 -
xen/arch/powerpc/domain_build.c | 60 +
xen/arch/powerpc/exceptions.c | 2
xen/arch/powerpc/exceptions.h | 3
xen/arch/powerpc/external.c | 3
xen/arch/powerpc/iommu.c | 17
xen/arch/powerpc/memory.c | 206 ++++++
xen/arch/powerpc/mm.c | 298
++++++++-
xen/arch/powerpc/mpic.c | 6
xen/arch/powerpc/ofd_fixup.c | 101 ---
xen/arch/powerpc/ofd_fixup_memory.c | 107 +++
xen/arch/powerpc/oftree.h | 8
xen/arch/powerpc/papr/tce.c | 6
xen/arch/powerpc/papr/xlate.c | 46 +
xen/arch/powerpc/powerpc64/exceptions.S | 37 +
xen/arch/powerpc/powerpc64/ppc970.c | 112 ++-
xen/arch/powerpc/setup.c | 207 +++---
xen/arch/powerpc/shadow.c | 159 ++++
xen/arch/powerpc/xen.lds.S | 10
xen/arch/x86/hvm/io.c | 10
xen/arch/x86/hvm/platform.c | 32
xen/arch/x86/hvm/svm/intr.c | 43 -
xen/arch/x86/hvm/svm/svm.c | 5
xen/arch/x86/hvm/vlapic.c | 14
xen/arch/x86/hvm/vmx/io.c | 13
xen/arch/x86/hvm/vmx/vmx.c | 29
xen/arch/x86/mm/shadow/multi.c | 66 --
xen/arch/x86/physdev.c | 5
xen/arch/x86/time.c | 4
xen/arch/x86/traps.c | 5
xen/arch/x86/x86_32/traps.c | 46 +
xen/arch/x86/x86_64/traps.c | 43 -
xen/common/perfc.c | 4
xen/include/asm-ia64/mm.h | 1
xen/include/asm-powerpc/config.h | 4
xen/include/asm-powerpc/current.h | 4
xen/include/asm-powerpc/domain.h | 7
xen/include/asm-powerpc/htab.h | 4
xen/include/asm-powerpc/mm.h | 183 +++--
xen/include/asm-powerpc/powerpc64/procarea.h | 1
xen/include/asm-powerpc/processor.h | 7
xen/include/asm-powerpc/shadow.h | 16
xen/include/asm-powerpc/smp.h | 4
xen/include/asm-powerpc/types.h | 19
xen/include/asm-x86/mm.h | 1
xen/include/asm-x86/page.h | 7
xen/include/asm-x86/processor.h | 7
xen/include/asm-x86/x86_32/page-2level.h | 3
xen/include/asm-x86/x86_32/page-3level.h | 2
xen/include/asm-x86/x86_32/page.h | 9
xen/include/asm-x86/x86_64/page.h | 11
xen/include/public/arch-ia64.h | 3
xen/include/public/arch-powerpc.h | 3
xen/include/public/arch-x86_32.h | 17
xen/include/public/arch-x86_64.h | 3
xen/include/public/domctl.h | 21
xen/include/public/sysctl.h | 16
xen/include/public/xen.h | 1
103 files changed, 2690 insertions(+), 1032 deletions(-)
diff -r 4ba098226429 -r 1bab7d65171b .hgignore
--- a/.hgignore Fri Sep 01 12:52:12 2006 -0600
+++ b/.hgignore Fri Sep 01 13:04:02 2006 -0600
@@ -203,6 +203,8 @@
^xen/arch/powerpc/firmware$
^xen/arch/powerpc/firmware_image$
^xen/arch/powerpc/xen\.lds$
+^xen/arch/powerpc/.xen-syms$
+^xen/arch/powerpc/xen-syms.S$
^unmodified_drivers/linux-2.6/\.tmp_versions
^unmodified_drivers/linux-2.6/.*\.cmd$
^unmodified_drivers/linux-2.6/.*\.ko$
diff -r 4ba098226429 -r 1bab7d65171b extras/mini-os/Makefile
--- a/extras/mini-os/Makefile Fri Sep 01 12:52:12 2006 -0600
+++ b/extras/mini-os/Makefile Fri Sep 01 13:04:02 2006 -0600
@@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk
# Set TARGET_ARCH
override TARGET_ARCH := $(XEN_TARGET_ARCH)
+XEN_INTERFACE_VERSION := 0x00030203
+
# NB. '-Wcast-qual' is nasty, so I omitted it.
CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
+CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION)
ASFLAGS = -D__ASSEMBLY__
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 13:04:02
2006 -0600
@@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e
* so we try it repeatedly and let the resource manager
* test it.
*/
+#ifndef CONFIG_XEN
request_resource(res, code_resource);
request_resource(res, data_resource);
+#endif
#ifdef CONFIG_KEXEC
request_resource(res, &crashk_res);
#endif
@@ -1454,11 +1456,8 @@ static void __init register_memory(void)
int i;
/* Nothing to do if not running in dom0. */
- if (!is_initial_xendomain()) {
- legacy_init_iomem_resources(e820.map, e820.nr_map,
- &code_resource, &data_resource);
+ if (!is_initial_xendomain())
return;
- }
#ifdef CONFIG_XEN
machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Fri Sep 01 13:04:02
2006 -0600
@@ -22,15 +22,6 @@
#define ISA_START_ADDRESS 0x0
#define ISA_END_ADDRESS 0x100000
-#if 0 /* not PAE safe */
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
- __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
- __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-#endif
-
static int direct_remap_area_pte_fn(pte_t *pte,
struct page *pmd_page,
unsigned long address,
@@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru
for (i = 0; i < size; i += PAGE_SIZE) {
if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
- /* Fill in the PTE pointers. */
+ /* Flush a full batch after filling in the PTE ptrs. */
rc = apply_to_page_range(mm, start_address,
address - start_address,
direct_remap_area_pte_fn, &w);
if (rc)
goto out;
- w = u;
rc = -EFAULT;
if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
goto out;
- v = u;
+ v = w = u;
start_address = address;
}
@@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru
}
if (v != u) {
- /* get the ptep's filled in */
+ /* Final batch. */
rc = apply_to_page_range(mm, start_address,
address - start_address,
direct_remap_area_pte_fn, &w);
@@ -178,32 +168,6 @@ int touch_pte_range(struct mm_struct *mm
}
EXPORT_SYMBOL(touch_pte_range);
-
-void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
-{
- int error;
-
- struct vm_struct *vma;
- vma = get_vm_area (vm_size, VM_IOREMAP);
-
- if (vma == NULL) {
- printk ("ioremap.c,vm_map_xen_pages(): "
- "Failed to get VMA area\n");
- return NULL;
- }
-
- error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
- maddr >> PAGE_SHIFT, vm_size,
- prot, DOMID_SELF );
- if (error == 0) {
- return vma->addr;
- } else {
- printk ("ioremap.c,vm_map_xen_pages(): "
- "Failed to map xen shared pages into kernel space\n");
- return NULL;
- }
-}
-EXPORT_SYMBOL(vm_map_xen_pages);
/*
* Does @address reside within a non-highmem page that is local to this virtual
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
--- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Fri Sep 01
13:04:02 2006 -0600
@@ -26,6 +26,7 @@
#include <xen/evtchn.h>
#include "op_counter.h"
+#include <xen/driver_util.h>
#include <xen/interface/xen.h>
#include <xen/interface/xenoprof.h>
#include <../../../drivers/oprofile/cpu_buffer.h>
@@ -33,8 +34,6 @@
static int xenoprof_start(void);
static void xenoprof_stop(void);
-
-void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
static int xenoprof_enabled = 0;
static unsigned int num_events = 0;
@@ -373,9 +372,9 @@ static int xenoprof_set_passive(int * p_
{
int ret;
int i, j;
- int vm_size;
int npages;
struct xenoprof_buf *buf;
+ struct vm_struct *area;
pgprot_t prot = __pgprot(_KERNPG_TABLE);
if (!is_primary)
@@ -391,19 +390,29 @@ static int xenoprof_set_passive(int * p_
for (i = 0; i < pdoms; i++) {
passive_domains[i].domain_id = p_domains[i];
passive_domains[i].max_samples = 2048;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
&passive_domains[i]);
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
+ &passive_domains[i]);
if (ret)
- return ret;
+ goto out;
npages = (passive_domains[i].bufsize * passive_domains[i].nbuf
- 1) / PAGE_SIZE + 1;
- vm_size = npages * PAGE_SIZE;
-
- p_shared_buffer[i] = (char
*)vm_map_xen_pages(passive_domains[i].buf_maddr,
- vm_size, prot);
- if (!p_shared_buffer[i]) {
+
+ area = alloc_vm_area(npages * PAGE_SIZE);
+ if (area == NULL) {
ret = -ENOMEM;
goto out;
}
+
+ ret = direct_kernel_remap_pfn_range(
+ (unsigned long)area->addr,
+ passive_domains[i].buf_maddr >> PAGE_SHIFT,
+ npages * PAGE_SIZE, prot, DOMID_SELF);
+ if (ret) {
+ vunmap(area->addr);
+ goto out;
+ }
+
+ p_shared_buffer[i] = area->addr;
for (j = 0; j < passive_domains[i].nbuf; j++) {
buf = (struct xenoprof_buf *)
@@ -473,11 +482,9 @@ int __init oprofile_arch_init(struct opr
int __init oprofile_arch_init(struct oprofile_operations * ops)
{
struct xenoprof_init init;
- struct xenoprof_buf * buf;
- int vm_size;
- int npages;
- int ret;
- int i;
+ struct xenoprof_buf *buf;
+ int npages, ret, i;
+ struct vm_struct *area;
init.max_samples = 16;
ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
@@ -495,14 +502,23 @@ int __init oprofile_arch_init(struct opr
num_events = OP_MAX_COUNTER;
npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1;
- vm_size = npages * PAGE_SIZE;
-
- shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr,
- vm_size, prot);
- if (!shared_buffer) {
+
+ area = alloc_vm_area(npages * PAGE_SIZE);
+ if (area == NULL) {
ret = -ENOMEM;
goto out;
}
+
+ ret = direct_kernel_remap_pfn_range(
+ (unsigned long)area->addr,
+ init.buf_maddr >> PAGE_SHIFT,
+ npages * PAGE_SIZE, prot, DOMID_SELF);
+ if (ret) {
+ vunmap(area->addr);
+ goto out;
+ }
+
+ shared_buffer = area->addr;
for (i=0; i< nbuf; i++) {
buf = (struct xenoprof_buf*)
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Fri Sep 01
13:04:02 2006 -0600
@@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc
* so we try it repeatedly and let the resource manager
* test it.
*/
+#ifndef CONFIG_XEN
request_resource(res, &code_resource);
request_resource(res, &data_resource);
+#endif
#ifdef CONFIG_KEXEC
request_resource(res, &crashk_res);
#endif
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Sep 01
13:04:02 2006 -0600
@@ -846,7 +846,7 @@ void __init setup_arch(char **cmdline_p)
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Make sure we have a large enough P->M table. */
- phys_to_machine_mapping = alloc_bootmem(
+ phys_to_machine_mapping = alloc_bootmem_pages(
end_pfn * sizeof(unsigned long));
memset(phys_to_machine_mapping, ~0,
end_pfn * sizeof(unsigned long));
@@ -863,7 +863,7 @@ void __init setup_arch(char **cmdline_p)
* list of frames that make up the p2m table. Used by
* save/restore.
*/
- pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
+ pfn_to_mfn_frame_list_list =
alloc_bootmem_pages(PAGE_SIZE);
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
=
virt_to_mfn(pfn_to_mfn_frame_list_list);
@@ -873,7 +873,7 @@ void __init setup_arch(char **cmdline_p)
k++;
BUG_ON(k>=fpp);
pfn_to_mfn_frame_list[k] =
- alloc_bootmem(PAGE_SIZE);
+ alloc_bootmem_pages(PAGE_SIZE);
pfn_to_mfn_frame_list_list[k] =
virt_to_mfn(pfn_to_mfn_frame_list[k]);
j=0;
@@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p)
BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map,
&memmap));
e820_reserve_resources(machine_e820, memmap.nr_entries);
- } else
-#endif
+ }
+#else
e820_reserve_resources(e820.map, e820.nr_map);
+#endif
request_resource(&iomem_resource, &video_ram_resource);
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 13:04:02
2006 -0600
@@ -301,11 +301,11 @@ static void frontend_changed(struct xenb
struct backend_info *be = dev->dev.driver_data;
int err;
- DPRINTK("");
+ DPRINTK("%s", xenbus_strstate(frontend_state));
switch (frontend_state) {
case XenbusStateInitialising:
- if (dev->state == XenbusStateClosing) {
+ if (dev->state == XenbusStateClosed) {
printk("%s: %s: prepare for reconnect\n",
__FUNCTION__, dev->nodename);
xenbus_switch_state(dev, XenbusStateInitWait);
@@ -331,8 +331,12 @@ static void frontend_changed(struct xenb
xenbus_switch_state(dev, XenbusStateClosing);
break;
+ case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
case XenbusStateUnknown:
- case XenbusStateClosed:
device_unregister(&dev->dev);
break;
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Sep 01
13:04:02 2006 -0600
@@ -273,7 +273,7 @@ static void backend_changed(struct xenbu
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
down(&bd->bd_sem);
- if (info->users > 0)
+ if (info->users > 0 && system_state == SYSTEM_RUNNING)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
@@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb
xlvbd_del(info);
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
}
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Sep 01 13:04:02
2006 -0600
@@ -114,8 +114,8 @@ typedef struct domid_translate {
} domid_translate_t ;
-domid_translate_t translate_domid[MAX_TAP_DEV];
-tap_blkif_t *tapfds[MAX_TAP_DEV];
+static domid_translate_t translate_domid[MAX_TAP_DEV];
+static tap_blkif_t *tapfds[MAX_TAP_DEV];
static int __init set_blkif_reqs(char *str)
{
@@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
"ring does not exist!\n");
print_dbug = 0; /*We only print this message once*/
}
- return 1;
+ return 0;
}
info = tapfds[blkif->dev_num];
@@ -1127,7 +1127,7 @@ static int do_block_io_op(blkif_t *blkif
WPRINTK("Can't get UE info!\n");
print_dbug = 0;
}
- return 1;
+ return 0;
}
while (rc != rp) {
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Fri Sep 01 13:04:02
2006 -0600
@@ -91,6 +91,7 @@ void tap_blkif_free(blkif_t *blkif);
void tap_blkif_free(blkif_t *blkif);
int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
unsigned int evtchn);
+void tap_blkif_unmap(blkif_t *blkif);
#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define blkif_put(_b) \
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Sep 01
13:04:02 2006 -0600
@@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign
return 0;
}
+void tap_blkif_unmap(blkif_t *blkif)
+{
+ if (blkif->irq) {
+ unbind_from_irqhandler(blkif->irq, blkif);
+ blkif->irq = 0;
+ }
+ if (blkif->blk_ring.sring) {
+ unmap_frontend_page(blkif);
+ free_vm_area(blkif->blk_ring_area);
+ blkif->blk_ring.sring = NULL;
+ }
+}
+
void tap_blkif_free(blkif_t *blkif)
{
atomic_dec(&blkif->refcnt);
wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
- /* Already disconnected? */
- if (blkif->irq)
- unbind_from_irqhandler(blkif->irq, blkif);
-
- if (blkif->blk_ring.sring) {
- unmap_frontend_page(blkif);
- free_vm_area(blkif->blk_ring_area);
- }
-
+ tap_blkif_unmap(blkif);
kmem_cache_free(blkif_cachep, blkif);
}
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Fri Sep 01 13:04:02
2006 -0600
@@ -247,6 +247,11 @@ static void tap_frontend_changed(struct
switch (frontend_state) {
case XenbusStateInitialising:
+ if (dev->state == XenbusStateClosed) {
+ printk("%s: %s: prepare for reconnect\n",
+ __FUNCTION__, dev->nodename);
+ xenbus_switch_state(dev, XenbusStateInitWait);
+ }
break;
case XenbusStateInitialised:
@@ -264,11 +269,20 @@ static void tap_frontend_changed(struct
break;
case XenbusStateClosing:
+ if (be->blkif->xenblkd) {
+ kthread_stop(be->blkif->xenblkd);
+ be->blkif->xenblkd = NULL;
+ }
+ tap_blkif_unmap(be->blkif);
xenbus_switch_state(dev, XenbusStateClosing);
break;
+ case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
case XenbusStateUnknown:
- case XenbusStateClosed:
device_unregister(&dev->dev);
break;
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 12:52:12
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 13:04:02
2006 -0600
@@ -228,13 +228,13 @@ static void frontend_changed(struct xenb
{
struct backend_info *be = dev->dev.driver_data;
- DPRINTK("");
+ DPRINTK("%s", xenbus_strstate(frontend_state));
be->frontend_state = frontend_state;
switch (frontend_state) {
case XenbusStateInitialising:
- if (dev->state == XenbusStateClosing) {
+ if (dev->state == XenbusStateClosed) {
printk("%s: %s: prepare for reconnect\n",
__FUNCTION__, dev->nodename);
if (be->netif) {
@@ -260,8 +260,12 @@ static void frontend_changed(struct xenb
xenbus_switch_state(dev, XenbusStateClosing);
break;
+ case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
case XenbusStateUnknown:
- case XenbusStateClosed:
if (be->netif != NULL)
kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
device_unregister(&dev->dev);
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Sep 01
13:04:02 2006 -0600
@@ -193,6 +193,7 @@ static void netfront_closing(struct xenb
static void end_access(int, void *);
static void netif_disconnect_backend(struct netfront_info *);
+static int open_netdev(struct netfront_info *);
static void close_netdev(struct netfront_info *);
static void netif_free(struct netfront_info *);
@@ -263,15 +264,22 @@ static int __devinit netfront_probe(stru
dev->dev.driver_data = info;
err = talk_to_backend(dev, info);
- if (err) {
- xennet_sysfs_delif(info->netdev);
- unregister_netdev(netdev);
- free_netdev(netdev);
- dev->dev.driver_data = NULL;
- return err;
- }
+ if (err)
+ goto fail_backend;
+
+ err = open_netdev(info);
+ if (err)
+ goto fail_open;
return 0;
+
+ fail_open:
+ xennet_sysfs_delif(info->netdev);
+ unregister_netdev(netdev);
+ fail_backend:
+ free_netdev(netdev);
+ dev->dev.driver_data = NULL;
+ return err;
}
@@ -478,7 +486,7 @@ static void backend_changed(struct xenbu
struct netfront_info *np = dev->dev.driver_data;
struct net_device *netdev = np->netdev;
- DPRINTK("\n");
+ DPRINTK("%s\n", xenbus_strstate(backend_state));
switch (backend_state) {
case XenbusStateInitialising:
@@ -1887,27 +1895,9 @@ create_netdev(int handle, int copying_re
SET_MODULE_OWNER(netdev);
SET_NETDEV_DEV(netdev, &dev->dev);
- err = register_netdev(netdev);
- if (err) {
- printk(KERN_WARNING "%s> register_netdev err=%d\n",
- __FUNCTION__, err);
- goto exit_free_rx;
- }
-
- err = xennet_sysfs_addif(netdev);
- if (err) {
- /* This can be non-fatal: it only means no tuning parameters */
- printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
- __FUNCTION__, err);
- }
-
np->netdev = netdev;
-
return netdev;
-
- exit_free_rx:
- gnttab_free_grant_references(np->gref_rx_head);
exit_free_tx:
gnttab_free_grant_references(np->gref_tx_head);
exit:
@@ -1946,11 +1936,10 @@ static void netfront_closing(struct xenb
{
struct netfront_info *info = dev->dev.driver_data;
- DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+ DPRINTK("%s\n", dev->nodename);
close_netdev(info);
-
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
}
@@ -1966,6 +1955,26 @@ static int __devexit netfront_remove(str
return 0;
}
+
+static int open_netdev(struct netfront_info *info)
+{
+ int err;
+
+ err = register_netdev(info->netdev);
+ if (err) {
+ printk(KERN_WARNING "%s: register_netdev err=%d\n",
+ __FUNCTION__, err);
+ return err;
+ }
+
+ err = xennet_sysfs_addif(info->netdev);
+ if (err) {
+ /* This can be non-fatal: it only means no tuning parameters */
+ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+ __FUNCTION__, err);
+ }
+ return 0;
+}
static void close_netdev(struct netfront_info *info)
{
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Fri Sep
01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Fri Sep
01 13:04:02 2006 -0600
@@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+ int rc, val;
+
+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+ if (rc != 1)
+ val = 0; /* no online node present */
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
MODULE_LICENSE("Dual BSD/GPL");
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri Sep 01
13:04:02 2006 -0600
@@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt,
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__,
##args)
+char *xenbus_strstate(enum xenbus_state state)
+{
+ static char *name[] = {
+ [ XenbusStateUnknown ] = "Unknown",
+ [ XenbusStateInitialising ] = "Initialising",
+ [ XenbusStateInitWait ] = "InitWait",
+ [ XenbusStateInitialised ] = "Initialised",
+ [ XenbusStateConnected ] = "Connected",
+ [ XenbusStateClosing ] = "Closing",
+ [ XenbusStateClosed ] = "Closed",
+ };
+ return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
@@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de
}
EXPORT_SYMBOL_GPL(xenbus_switch_state);
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+ xenbus_switch_state(dev, XenbusStateClosed);
+ complete(&dev->down);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
/**
* Return the path to the error node for the given device, or NULL on failure.
diff -r 4ba098226429 -r 1bab7d65171b
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Sep 01
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Sep 01
13:04:02 2006 -0600
@@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch
static int xenbus_dev_probe(struct device *_dev);
static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
@@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte
.match = xenbus_match,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
+ .shutdown = xenbus_dev_shutdown,
},
.dev = {
.bus_id = "xen",
@@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen
.match = xenbus_match,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
+// .shutdown = xenbus_dev_shutdown,
.uevent = xenbus_uevent_backend,
},
.dev = {
@@ -316,8 +319,9 @@ static void otherend_changed(struct xenb
state = xenbus_read_driver_state(dev->otherend);
- DPRINTK("state is %d, %s, %s",
- state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+ DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+ dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
@@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic
const struct xenbus_device_id *id;
int err;
- DPRINTK("");
+ DPRINTK("%s", dev->nodename);
if (!drv->probe) {
err = -ENODEV;
@@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
- DPRINTK("");
+ DPRINTK("%s", dev->nodename);
free_otherend_watch(dev);
free_otherend_details(dev);
@@ -403,6 +407,27 @@ static int xenbus_dev_remove(struct devi
xenbus_switch_state(dev, XenbusStateClosed);
return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+ dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ printk("%s: %s timeout closing device\n", __FUNCTION__,
dev->nodename);
+ out:
+ put_device(&dev->dev);
}
static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_
tmpstring += strlen(tmpstring) + 1;
strcpy(tmpstring, type);
xendev->devicetype = tmpstring;
+ init_completion(&xendev->down);
xendev->dev.parent = &bus->dev;
xendev->dev.bus = &bus->bus;
diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 13:04:02 2006 -0600
@@ -37,6 +37,7 @@
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
+#include <linux/completion.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
struct xenbus_watch otherend_watch;
struct device dev;
enum xenbus_state state;
+ struct completion down;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi
int __init xenbus_dev_init(void);
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
+
#endif /* _XEN_XENBUS_H */
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-aio.c Fri Sep 01 13:04:02 2006 -0600
@@ -52,7 +52,7 @@
*/
#define REQUEST_ASYNC_FD 1
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
struct pending_aio {
td_callback_t cb;
@@ -146,7 +146,7 @@ int tdaio_open (struct td_state *s, cons
struct tdaio_state *prv = (struct tdaio_state *)s->private;
s->private = prv;
- DPRINTF("XXX: block-aio open('%s')", name);
+ DPRINTF("block-aio open('%s')", name);
/* Initialize AIO */
prv->iocb_free_count = MAX_AIO_REQS;
prv->iocb_queued = 0;
@@ -156,9 +156,18 @@ int tdaio_open (struct td_state *s, cons
if (prv->poll_fd < 0) {
ret = prv->poll_fd;
- DPRINTF("Couldn't get fd for AIO poll support. This is "
- "probably because your kernel does not have the "
- "aio-poll patch applied.\n");
+ if (ret == -EAGAIN) {
+ DPRINTF("Couldn't setup AIO context. If you are "
+ "trying to concurrently use a large number "
+ "of blktap-based disks, you may need to "
+ "increase the system-wide aio request limit. "
+ "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+ "aio-max-nr')\n");
+ } else {
+ DPRINTF("Couldn't get fd for AIO poll support. This "
+ "is probably because your kernel does not "
+ "have the aio-poll patch applied.\n");
+ }
goto done;
}
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-qcow.c Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,7 @@
/******AIO DEFINES******/
#define REQUEST_ASYNC_FD 1
#define MAX_QCOW_IDS 0xFFFF
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
struct pending_aio {
td_callback_t cb;
@@ -176,10 +176,21 @@ static int init_aio_state(struct td_stat
s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
- if (s->poll_fd < 0) {
- DPRINTF("Retrieving Async poll fd failed\n");
+ if (s->poll_fd < 0) {
+ if (s->poll_fd == -EAGAIN) {
+ DPRINTF("Couldn't setup AIO context. If you are "
+ "trying to concurrently use a large number "
+ "of blktap-based disks, you may need to "
+ "increase the system-wide aio request limit. "
+ "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+ "aio-max-nr')\n");
+ } else {
+ DPRINTF("Couldn't get fd for AIO poll support. This "
+ "is probably because your kernel does not "
+ "have the aio-poll patch applied.\n");
+ }
goto fail;
- }
+ }
for (i=0;i<MAX_AIO_REQS;i++)
s->iocb_free[i] = &s->iocb_list[i];
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/tapdisk.c Fri Sep 01 13:04:02 2006 -0600
@@ -110,6 +110,7 @@ static void unmap_disk(struct td_state *
free(s->fd_entry);
free(s->blkif);
free(s->ring_info);
+ free(s->private);
free(s);
return;
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/lib/xs_api.c
--- a/tools/blktap/lib/xs_api.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/lib/xs_api.c Fri Sep 01 13:04:02 2006 -0600
@@ -204,7 +204,7 @@ int convert_dev_name_to_num(char *name)
int convert_dev_name_to_num(char *name) {
char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr;
int majors[10] = {3,22,33,34,56,57,88,89,90,91};
- int maj,i;
+ int maj,i,ret = 0;
asprintf(&p_sd,"/dev/sd");
asprintf(&p_hd,"/dev/hd");
@@ -221,7 +221,7 @@ int convert_dev_name_to_num(char *name)
*ptr++;
}
*p++;
- return BASE_DEV_VAL + (16*i) + atoi(p);
+ ret = BASE_DEV_VAL + (16*i) + atoi(p);
} else if (strstr(name, p_hd) != NULL) {
p = name + strlen(p_hd);
for (i = 0, ptr = alpha; i < strlen(alpha); i++) {
@@ -229,7 +229,7 @@ int convert_dev_name_to_num(char *name)
*ptr++;
}
*p++;
- return (majors[i/2]*256) + atoi(p);
+ ret = (majors[i/2]*256) + atoi(p);
} else if (strstr(name, p_xvd) != NULL) {
p = name + strlen(p_xvd);
@@ -238,17 +238,24 @@ int convert_dev_name_to_num(char *name)
*ptr++;
}
*p++;
- return (202*256) + (16*i) + atoi(p);
+ ret = (202*256) + (16*i) + atoi(p);
} else if (strstr(name, p_plx) != NULL) {
p = name + strlen(p_plx);
- return atoi(p);
+ ret = atoi(p);
} else {
DPRINTF("Unknown device type, setting to default.\n");
- return BASE_DEV_VAL;
- }
- return 0;
+ ret = BASE_DEV_VAL;
+ }
+
+ free(p_sd);
+ free(p_hd);
+ free(p_xvd);
+ free(p_plx);
+ free(alpha);
+
+ return ret;
}
/**
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/ia64/xc_ia64_stubs.c
--- a/tools/libxc/ia64/xc_ia64_stubs.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_stubs.c Fri Sep 01 13:04:02 2006 -0600
@@ -36,7 +36,6 @@ xc_ia64_get_pfn_list(int xc_handle, uint
struct xen_domctl domctl;
int num_pfns,ret;
unsigned int __start_page, __nr_pages;
- unsigned long max_pfns;
xen_pfn_t *__pfn_buf;
__start_page = start_page;
@@ -44,27 +43,22 @@ xc_ia64_get_pfn_list(int xc_handle, uint
__pfn_buf = pfn_buf;
while (__nr_pages) {
- max_pfns = ((unsigned long)__start_page << 32) | __nr_pages;
domctl.cmd = XEN_DOMCTL_getmemlist;
- domctl.domain = (domid_t)domid;
- domctl.u.getmemlist.max_pfns = max_pfns;
+ domctl.domain = (domid_t)domid;
+ domctl.u.getmemlist.max_pfns = __nr_pages;
+ domctl.u.getmemlist.start_pfn =__start_page;
domctl.u.getmemlist.num_pfns = 0;
set_xen_guest_handle(domctl.u.getmemlist.buffer, __pfn_buf);
- if ((max_pfns != -1UL)
- && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
+ if (mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
PERROR("Could not lock pfn list buffer");
return -1;
}
ret = do_domctl(xc_handle, &domctl);
- if (max_pfns != -1UL)
- (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
+ (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
- if (max_pfns == -1UL)
- return 0;
-
num_pfns = domctl.u.getmemlist.num_pfns;
__start_page += num_pfns;
__nr_pages -= num_pfns;
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/powerpc64/xc_linux_build.c
--- a/tools/libxc/powerpc64/xc_linux_build.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/powerpc64/xc_linux_build.c Fri Sep 01 13:04:02 2006 -0600
@@ -309,7 +309,7 @@ static unsigned long create_start_info(s
si->store_evtchn = store_evtchn;
si->console.domU.mfn = si->nr_pages - 3;
si->console.domU.evtchn = console_evtchn;
- si_addr = eomem - (PAGE_SIZE * 4);
+ si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
return si_addr;
}
@@ -388,7 +388,7 @@ int xc_linux_build(int xc_handle,
}
si_addr = create_start_info(&si, console_evtchn, store_evtchn);
- *console_mfn = page_array[si.console_mfn];
+ *console_mfn = page_array[si.console.domU.mfn];
*store_mfn = page_array[si.store_mfn];
if (install_image(xc_handle, domid, page_array, &si, si_addr,
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/xenctrl.h Fri Sep 01 13:04:02 2006 -0600
@@ -8,6 +8,11 @@
#ifndef XENCTRL_H
#define XENCTRL_H
+
+/* Tell the Xen public headers we are a user-space tools build. */
+#ifndef __XEN_TOOLS__
+#define __XEN_TOOLS__ 1
+#endif
#include <stddef.h>
#include <stdint.h>
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py Fri Sep 01 13:04:02 2006 -0600
@@ -161,10 +161,12 @@ def restore(xd, fd):
if handler.store_mfn is None or handler.console_mfn is None:
raise XendError('Could not read store/console MFN')
+ #Block until src closes connection
+ os.read(fd, 1)
dominfo.unpause()
-
+
dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
-
+
return dominfo
except:
dominfo.destroy()
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py Fri Sep 01 13:04:02 2006 -0600
@@ -431,7 +431,8 @@ class XendDomain:
sock.send("receive\n")
sock.recv(80)
XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
-
+ dominfo.testDeviceComplete()
+ sock.close()
def domain_save(self, domid, dst):
"""Start saving a domain to file.
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Sep 01 13:04:02 2006 -0600
@@ -30,7 +30,6 @@ import time
import time
import threading
import os
-import math
import xen.lowlevel.xc
from xen.util import asserts
@@ -703,6 +702,9 @@ class XendDomainInfo:
if security[idx][0] == 'ssidref':
to_store['security/ssidref'] = str(security[idx][1])
+ if not self.readVm('xend/restart_count'):
+ to_store['xend/restart_count'] = str(0)
+
log.debug("Storing VM details: %s", to_store)
self.writeVm(to_store)
@@ -823,6 +825,9 @@ class XendDomainInfo:
def setResume(self, state):
self.info['resume'] = state
+
+ def getRestartCount(self):
+ return self.readVm('xend/restart_count')
def refreshShutdown(self, xeninfo = None):
# If set at the end of this method, a restart is required, with the
@@ -1280,34 +1285,28 @@ class XendDomainInfo:
for v in range(0, self.info['max_vcpu_id']+1):
xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
- # set domain maxmem in KiB
- xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024)
-
- m = self.image.getDomainMemory(self.info['memory'] * 1024)
+ # set memory limit
+ maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
+ xc.domain_setmaxmem(self.domid, maxmem)
+
+ mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
# get the domain's shadow memory requirement
- sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0))
- if self.info['shadow_memory'] > sm:
- sm = self.info['shadow_memory']
+ shadow_kb = self.image.getRequiredShadowMemory(mem_kb)
+ shadow_kb_req = self.info['shadow_memory'] * 1024
+ if shadow_kb_req > shadow_kb:
+ shadow_kb = shadow_kb_req
+ shadow_mb = (shadow_kb + 1023) / 1024
# Make sure there's enough RAM available for the domain
- balloon.free(m + sm * 1024)
+ balloon.free(mem_kb + shadow_mb * 1024)
# Set up the shadow memory
- sm = xc.shadow_mem_control(self.domid, mb=sm)
- self.info['shadow_memory'] = sm
-
- init_reservation = self.info['memory'] * 1024
- if os.uname()[4] in ('ia64', 'ppc64'):
- # Workaround for architectures that don't yet support
- # ballooning.
- init_reservation = m
- # Following line from xiantao.zhang@xxxxxxxxx
- # Needed for IA64 until supports ballooning -- okay for PPC64?
- xc.domain_setmaxmem(self.domid, m)
-
- xc.domain_memory_increase_reservation(self.domid, init_reservation,
- 0, 0)
+ shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb)
+ self.info['shadow_memory'] = shadow_cur
+
+ # initial memory allocation
+ xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
self.createChannels()
@@ -1495,6 +1494,21 @@ class XendDomainInfo:
if rc != 0:
raise XendError("Device of type '%s' refuses migration." % n)
+ def testDeviceComplete(self):
+ """ For Block IO migration safety we must ensure that
+ the device has shutdown correctly, i.e. all blocks are
+ flushed to disk
+ """
+ while True:
+ test = 0
+ for i in self.getDeviceController('vbd').deviceIDs():
+ test = 1
+ log.info("Dev %s still active, looping...", i)
+ time.sleep(0.1)
+
+ if test == 0:
+ break
+
def migrateDevices(self, network, dst, step, domName=''):
"""Notify the devices about migration
"""
@@ -1615,6 +1629,9 @@ class XendDomainInfo:
try:
new_dom = XendDomain.instance().domain_create(config)
new_dom.unpause()
+ rst_cnt = self.readVm('xend/restart_count')
+ rst_cnt = int(rst_cnt) + 1
+ self.writeVm('xend/restart_count', str(rst_cnt))
new_dom.removeVm(RESTART_IN_PROGRESS)
except:
if new_dom:
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/image.py Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@ from xen.xend.XendLogging import log
from xen.xend.XendLogging import log
from xen.xend.server.netif import randomMAC
from xen.xend.xenstore.xswatch import xswatch
+from xen.xend import arch
+from xen.xend import FlatDeviceTree
xc = xen.lowlevel.xc.xc()
@@ -141,19 +143,10 @@ class ImageHandler:
raise VmError('Building domain failed: ostype=%s dom=%d err=%s'
% (self.ostype, self.vm.getDomid(), str(result)))
-
- def getDomainMemory(self, mem_kb):
- """@return The memory required, in KiB, by the domain to store the
- given amount, also in KiB."""
- if os.uname()[4] != 'ia64':
- # A little extra because auto-ballooning is broken w.r.t. HVM
- # guests. Also, slack is necessary for live migration since that
- # uses shadow page tables.
- if 'hvm' in xc.xeninfo()['xen_caps']:
- mem_kb += 4*1024;
+ def getRequiredMemory(self, mem_kb):
return mem_kb
- def getDomainShadowMemory(self, mem_kb):
+ def getRequiredShadowMemory(self, mem_kb):
"""@return The minimum shadow memory required, in KiB, for a domain
with mem_kb KiB of RAM."""
# PV domains don't need any shadow memory
@@ -197,9 +190,39 @@ class LinuxImageHandler(ImageHandler):
ramdisk = self.ramdisk,
features = self.vm.getFeatures())
+class PPC_LinuxImageHandler(LinuxImageHandler):
+
+ ostype = "linux"
+
+ def configure(self, imageConfig, deviceConfig):
+ LinuxImageHandler.configure(self, imageConfig, deviceConfig)
+ self.imageConfig = imageConfig
+
+ def buildDomain(self):
+ store_evtchn = self.vm.getStorePort()
+ console_evtchn = self.vm.getConsolePort()
+
+ log.debug("dom = %d", self.vm.getDomid())
+ log.debug("image = %s", self.kernel)
+ log.debug("store_evtchn = %d", store_evtchn)
+ log.debug("console_evtchn = %d", console_evtchn)
+ log.debug("cmdline = %s", self.cmdline)
+ log.debug("ramdisk = %s", self.ramdisk)
+ log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("features = %s", self.vm.getFeatures())
+
+ devtree = FlatDeviceTree.build(self)
+
+ return xc.linux_build(dom = self.vm.getDomid(),
+ image = self.kernel,
+ store_evtchn = store_evtchn,
+ console_evtchn = console_evtchn,
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ features = self.vm.getFeatures(),
+ arch_args = devtree.to_bin())
+
class HVMImageHandler(ImageHandler):
-
- ostype = "hvm"
def configure(self, imageConfig, deviceConfig):
ImageHandler.configure(self, imageConfig, deviceConfig)
@@ -282,7 +305,7 @@ class HVMImageHandler(ImageHandler):
for (name, info) in deviceConfig:
if name == 'vbd':
uname = sxp.child_value(info, 'uname')
- if 'file:' in uname:
+ if uname is not None and 'file:' in uname:
(_, vbdparam) = string.split(uname, ':', 1)
if not os.path.isfile(vbdparam):
raise VmError('Disk image does not exist: %s' %
@@ -355,32 +378,6 @@ class HVMImageHandler(ImageHandler):
os.waitpid(self.pid, 0)
self.pid = 0
- def getDomainMemory(self, mem_kb):
- """@see ImageHandler.getDomainMemory"""
- if os.uname()[4] == 'ia64':
- page_kb = 16
- # ROM size for guest firmware, ioreq page and xenstore page
- extra_pages = 1024 + 2
- else:
- page_kb = 4
- # This was derived emperically:
- # 2.4 MB overhead per 1024 MB RAM + 8 MB constant
- # + 4 to avoid low-memory condition
- extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
- extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
- return mem_kb + extra_pages * page_kb
-
- def getDomainShadowMemory(self, mem_kb):
- """@return The minimum shadow memory required, in KiB, for a domain
- with mem_kb KiB of RAM."""
- if os.uname()[4] in ('ia64', 'ppc64'):
- # Explicit shadow memory is not a concept
- return 0
- else:
- # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than
- # the minimum that Xen would allocate if no value were given.
- return 1024 * self.vm.getVCpuCount() + mem_kb / 256
-
def register_shutdown_watch(self):
""" add xen store watch on control/shutdown """
self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
@@ -417,15 +414,51 @@ class HVMImageHandler(ImageHandler):
return 1 # Keep watching
-"""Table of image handler classes for virtual machine images. Indexed by
-image type.
-"""
-imageHandlerClasses = {}
-
-
-for h in LinuxImageHandler, HVMImageHandler:
- imageHandlerClasses[h.ostype] = h
-
+class IA64_HVM_ImageHandler(HVMImageHandler):
+
+ ostype = "hvm"
+
+ def getRequiredMemory(self, mem_kb):
+ page_kb = 16
+ # ROM size for guest firmware, ioreq page and xenstore page
+ extra_pages = 1024 + 2
+ return mem_kb + extra_pages * page_kb
+
+ def getRequiredShadowMemory(self, mem_kb):
+ # Explicit shadow memory is not a concept
+ return 0
+
+class X86_HVM_ImageHandler(HVMImageHandler):
+
+ ostype = "hvm"
+
+ def getRequiredMemory(self, mem_kb):
+ page_kb = 4
+ # This was derived emperically:
+ # 2.4 MB overhead per 1024 MB RAM + 8 MB constant
+ # + 4 to avoid low-memory condition
+ extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+ extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+ return mem_kb + extra_pages * page_kb
+
+ def getRequiredShadowMemory(self, mem_kb):
+ # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than
+ # the minimum that Xen would allocate if no value were given.
+ return 1024 * self.vm.getVCpuCount() + mem_kb / 256
+
+_handlers = {
+ "powerpc": {
+ "linux": PPC_LinuxImageHandler,
+ },
+ "ia64": {
+ "linux": LinuxImageHandler,
+ "hvm": IA64_HVM_ImageHandler,
+ },
+ "x86": {
+ "linux": LinuxImageHandler,
+ "hvm": X86_HVM_ImageHandler,
+ },
+}
def findImageHandlerClass(image):
"""Find the image handler class for an image config.
@@ -433,10 +466,10 @@ def findImageHandlerClass(image):
@param image config
@return ImageHandler subclass or None
"""
- ty = sxp.name(image)
- if ty is None:
+ type = sxp.name(image)
+ if type is None:
raise VmError('missing image type')
- imageClass = imageHandlerClasses.get(ty)
- if imageClass is None:
- raise VmError('unknown image type: ' + ty)
- return imageClass
+ try:
+ return _handlers[arch.type][type]
+ except KeyError:
+ raise VmError('unknown image type: ' + type)
diff -r 4ba098226429 -r 1bab7d65171b
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py Fri Sep 01 12:52:12
2006 -0600
+++ b/tools/python/xen/xend/server/DevController.py Fri Sep 01 13:04:02
2006 -0600
@@ -207,6 +207,9 @@ class DevController:
devid = int(devid)
+ # Modify online status /before/ updating state (latter is watched by
+ # drivers, so this ordering avoids a race).
+ self.writeBackend(devid, 'online', "0")
self.writeBackend(devid, 'state', str(xenbusState['Closing']))
@@ -406,7 +409,8 @@ class DevController:
'domain' : self.vm.getName(),
'frontend' : frontpath,
'frontend-id' : "%i" % self.vm.getDomid(),
- 'state' : str(xenbusState['Initialising'])
+ 'state' : str(xenbusState['Initialising']),
+ 'online' : "1"
})
return (backpath, frontpath)
diff -r 4ba098226429 -r 1bab7d65171b
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py Fri Sep 01 12:52:12
2006 -0600
+++ b/tools/python/xen/xend/server/XMLRPCServer.py Fri Sep 01 13:04:02
2006 -0600
@@ -78,7 +78,8 @@ methods = ['device_create', 'device_conf
methods = ['device_create', 'device_configure', 'destroyDevice',
'getDeviceSxprs',
'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
- 'send_sysrq', 'getVCPUInfo', 'waitForDevices']
+ 'send_sysrq', 'getVCPUInfo', 'waitForDevices',
+ 'getRestartCount']
exclude = ['domain_create', 'domain_restore']
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/server/blkif.py Fri Sep 01 13:04:02 2006 -0600
@@ -52,10 +52,18 @@ class BlkifController(DevController):
except ValueError:
dev_type = "disk"
- try:
- (typ, params) = string.split(uname, ':', 1)
- except ValueError:
- (typ, params) = ("", "")
+ if uname is None:
+ if dev_type == 'cdrom':
+ (typ, params) = ("", "")
+ else:
+ raise VmError(
+ 'Block device must have physical details specified')
+ else:
+ try:
+ (typ, params) = string.split(uname, ':', 1)
+ except ValueError:
+ (typ, params) = ("", "")
+
back = { 'dev' : dev,
'type' : typ,
'params' : params,
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/migrate.py Fri Sep 01 13:04:02 2006 -0600
@@ -57,7 +57,8 @@ def main(argv):
opts.usage()
return
if len(args) != 2:
- opts.err('Invalid arguments: ' + str(args))
+ opts.usage()
+ sys.exit(1)
dom = args[0]
dst = args[1]
server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource,
opts.vals.port)
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/shutdown.py Fri Sep 01 13:04:02 2006 -0600
@@ -48,21 +48,48 @@ gopts.opt('reboot', short='R',
fn=set_true, default=0,
use='Shutdown and reboot.')
+def wait_reboot(opts, doms, rcs):
+ while doms:
+ alive = server.xend.domains(0)
+ reboot = []
+ for d in doms:
+ if d in alive:
+ rc = server.xend.domain.getRestartCount(d)
+ if rc == rcs[d]: continue
+ reboot.append(d)
+ else:
+ opts.info("Domain %s destroyed for failed in rebooting" % d)
+ doms.remove(d)
+ for d in reboot:
+ opts.info("Domain %s rebooted" % d)
+ doms.remove(d)
+ time.sleep(1)
+ opts.info("All domains rebooted")
+
+def wait_shutdown(opts, doms):
+ while doms:
+ alive = server.xend.domains(0)
+ dead = []
+ for d in doms:
+ if d in alive: continue
+ dead.append(d)
+ for d in dead:
+ opts.info("Domain %s terminated" % d)
+ doms.remove(d)
+ time.sleep(1)
+ opts.info("All domains terminated")
+
def shutdown(opts, doms, mode, wait):
+ rcs = {}
for d in doms:
+ rcs[d] = server.xend.domain.getRestartCount(d)
server.xend.domain.shutdown(d, mode)
+
if wait:
- while doms:
- alive = server.xend.domains(0)
- dead = []
- for d in doms:
- if d in alive: continue
- dead.append(d)
- for d in dead:
- opts.info("Domain %s terminated" % d)
- doms.remove(d)
- time.sleep(1)
- opts.info("All domains terminated")
+ if mode == 'reboot':
+ wait_reboot(opts, doms, rcs)
+ else:
+ wait_shutdown(opts, doms)
def shutdown_mode(opts):
if opts.vals.halt and opts.vals.reboot:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/ia64/xen/dom0_ops.c Fri Sep 01 13:04:02 2006 -0600
@@ -40,8 +40,8 @@ long arch_do_domctl(xen_domctl_t *op, XE
{
unsigned long i;
struct domain *d = find_domain_by_id(op->domain);
- unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
- unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
+ unsigned long start_page = op->u.getmemlist.start_pfn;
+ unsigned long nr_pages = op->u.getmemlist.max_pfns;
unsigned long mfn;
if ( d == NULL ) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Makefile Fri Sep 01 13:04:02 2006 -0600
@@ -6,6 +6,7 @@ subdir-y += papr
subdir-y += papr
obj-y += audit.o
+obj-y += backtrace.o
obj-y += bitops.o
obj-y += boot_of.o
obj-y += dart.o
@@ -19,19 +20,21 @@ obj-y += external.o
obj-y += external.o
obj-y += float.o
obj-y += hcalls.o
-obj-y += htab.o
obj-y += iommu.o
obj-y += irq.o
obj-y += mambo.o
+obj-y += memory.o
obj-y += mm.o
obj-y += mpic.o
obj-y += mpic_init.o
obj-y += of-devtree.o
obj-y += of-devwalk.o
obj-y += ofd_fixup.o
+obj-y += ofd_fixup_memory.o
obj-y += physdev.o
obj-y += rtas.o
obj-y += setup.o
+obj-y += shadow.o
obj-y += smp.o
obj-y += time.o
obj-y += usercopy.o
@@ -47,6 +50,7 @@ obj-y += elf32.o
# These are extra warnings like for the arch/ppc directory but may not
# allow the rest of the tree to build.
PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations
+PPC_C_WARNINGS += -Wshadow
CFLAGS += $(PPC_C_WARNINGS)
LINK=0x400000
@@ -91,8 +95,27 @@ start.o: boot/start.S
start.o: boot/start.S
$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
-$(TARGET)-syms: start.o $(ALL_OBJS) xen.lds
- $(CC) $(CFLAGS) $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
start.o $(ALL_OBJS) -o $@
+TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
+TARGET_OPTS += start.o $(ALL_OBJS)
+
+.xen-syms: start.o $(ALL_OBJS) xen.lds
+ $(CC) $(CFLAGS) $(TARGET_OPTS) -o $@
+
+NM=$(CROSS_COMPILE)nm
+new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null;
then echo y; else echo n; fi)
+
+ifeq ($(new_nm),y)
+NM := $(NM) --synthetic
+endif
+
+xen-syms.S: .xen-syms
+ $(NM) -n $^ | $(BASEDIR)/tools/symbols > $@
+
+xen-syms.o: xen-syms.S
+ $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
+$(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
+ $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
$(TARGET).bin: $(TARGET)-syms
$(CROSS_COMPILE)objcopy --output-target=binary $< $@
@@ -122,4 +145,4 @@ dom0.bin: $(DOM0_IMAGE)
clean::
$(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
- rm -f firmware firmware_image dom0.bin
+ rm -f firmware firmware_image dom0.bin .xen-syms
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Rules.mk
--- a/xen/arch/powerpc/Rules.mk Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Rules.mk Fri Sep 01 13:04:02 2006 -0600
@@ -4,7 +4,7 @@ LD := $(CROSS_COMPILE)ld
LD := $(CROSS_COMPILE)ld
# These are goodess that applies to all source.
-C_WARNINGS := -Wpointer-arith -Wredundant-decls
+C_WARNINGS := -Wredundant-decls
# _no_ common code can have packed data structures or we are in touble.
C_WARNINGS += -Wpacked
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/boot_of.c Fri Sep 01 13:04:02 2006 -0600
@@ -26,10 +26,14 @@
#include <xen/spinlock.h>
#include <xen/serial.h>
#include <xen/time.h>
+#include <xen/sched.h>
#include <asm/page.h>
#include <asm/io.h>
#include "exceptions.h"
#include "of-devtree.h"
+
+/* Secondary processors use this for handshaking with main processor. */
+volatile unsigned int __spin_ack;
static ulong of_vec;
static ulong of_msr;
@@ -322,17 +326,18 @@ static void __init of_test(const char *o
}
}
-static int __init of_claim(void * virt, u32 size)
+static int __init of_claim(u32 virt, u32 size, u32 align)
{
int rets[1] = { OF_FAILURE };
- of_call("claim", 3, 1, rets, virt, size, 0/*align*/);
+ of_call("claim", 3, 1, rets, virt, size, align);
if (rets[0] == OF_FAILURE) {
- DBG("%s 0x%p 0x%08x -> FAIL\n", __func__, virt, size);
+ DBG("%s 0x%08x 0x%08x 0x%08x -> FAIL\n", __func__, virt, size, align);
return OF_FAILURE;
}
- DBG("%s 0x%p 0x%08x -> 0x%x\n", __func__, virt, size, rets[0]);
+ DBG("%s 0x%08x 0x%08x 0x%08x -> 0x%08x\n", __func__, virt, size, align,
+ rets[0]);
return rets[0];
}
@@ -683,32 +688,53 @@ static int boot_of_fixup_chosen(void *me
}
static ulong space_base;
-static ulong find_space(u32 size, ulong align, multiboot_info_t *mbi)
+
+/*
+ * The following function is necessary because we cannot depend on all
+ * FW to actually allocate us any space, so we look for it _hoping_
+ * that at least is will fail if we try to claim something that
+ * belongs to FW. This hope does not seem to be true on some version
+ * of PIBS.
+ */
+static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
{
memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
ulong base;
- of_printf("%s base=0x%016lx eomem=0x%016lx size=0x%08x align=0x%lx\n",
+ if (size == 0)
+ return 0;
+
+ if (align == 0)
+ of_panic("cannot call %s() with align of 0\n", __func__);
+
+#ifdef BROKEN_CLAIM_WORKAROUND
+ {
+ static int broken_claim;
+ if (!broken_claim) {
+ /* just try and claim it to the FW chosen address */
+ base = of_claim(0, size, align);
+ if (base != OF_FAILURE)
+ return base;
+ of_printf("%s: Firmware does not allocate memory for you\n",
+ __func__);
+ broken_claim = 1;
+ }
+ }
+#endif
+
+ of_printf("%s base=0x%016lx eomem=0x%016lx size=0x%08x align=0x%x\n",
__func__, space_base, eomem, size, align);
base = ALIGN_UP(space_base, PAGE_SIZE);
- if ((base + size) >= 0x4000000) return 0;
- if (base + size > eomem) of_panic("not enough RAM\n");
-
- if (size == 0) return base;
- if (of_claim((void*)base, size) != OF_FAILURE) {
- space_base = base + size;
- return base;
- } else {
- for(base += 0x100000; (base+size) < 0x4000000; base += 0x100000) {
- of_printf("Trying 0x%016lx\n", base);
- if (of_claim((void*)base, size) != OF_FAILURE) {
- space_base = base + size;
- return base;
- }
- }
- return 0;
- }
+
+ while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
+ if (of_claim(base, size, 0) != OF_FAILURE) {
+ space_base = base + size;
+ return base;
+ }
+ base += (PAGE_SIZE > align) ? PAGE_SIZE : align;
+ }
+ of_panic("Cannot find memory in the RMA\n");
}
/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
@@ -834,9 +860,8 @@ static void boot_of_module(ulong r3, ulo
static module_t mods[3];
void *oftree;
ulong oftree_sz = 48 * PAGE_SIZE;
- char *mod0_start;
+ ulong mod0_start;
ulong mod0_size;
- ulong mod0;
static const char sepr[] = " -- ";
extern char dom0_start[] __attribute__ ((weak));
extern char dom0_size[] __attribute__ ((weak));
@@ -844,59 +869,48 @@ static void boot_of_module(ulong r3, ulo
if ((r3 > 0) && (r4 > 0)) {
/* was it handed to us in registers ? */
- mod0_start = (void *)r3;
+ mod0_start = r3;
mod0_size = r4;
+ of_printf("%s: Dom0 was loaded and found using r3/r4:"
+ "0x%lx[size 0x%lx]\n",
+ __func__, mod0_start, mod0_size);
} else {
/* see if it is in the boot params */
p = strstr((char *)((ulong)mbi->cmdline), "dom0_start=");
if ( p != NULL) {
p += 11;
- mod0_start = (char *)simple_strtoul(p, NULL, 0);
+ mod0_start = simple_strtoul(p, NULL, 0);
p = strstr((char *)((ulong)mbi->cmdline), "dom0_size=");
p += 10;
mod0_size = simple_strtoul(p, NULL, 0);
-
- of_printf("mod0: %o %c %c %c\n",
- mod0_start[0],
- mod0_start[1],
- mod0_start[2],
- mod0_start[3]);
-
+ of_printf("%s: Dom0 was loaded and found using cmdline:"
+ "0x%lx[size 0x%lx]\n",
+ __func__, mod0_start, mod0_size);
} else if ( ((ulong)dom0_start != 0) && ((ulong)dom0_size != 0) ) {
/* was it linked in ? */
- mod0_start = dom0_start;
+ mod0_start = (ulong)dom0_start;
mod0_size = (ulong)dom0_size;
- of_printf("%s: linked in module copied after _end "
- "(start 0x%p size 0x%lx)\n",
+ of_printf("%s: Dom0 is linked in: 0x%lx[size 0x%lx]\n",
__func__, mod0_start, mod0_size);
} else {
- mod0_start = _end;
+ mod0_start = (ulong)_end;
mod0_size = 0;
- }
+ of_printf("%s: FYI Dom0 is unknown, will be caught later\n",
+ __func__);
+ }
+ }
+
+ if (mod0_size > 0) {
+ const char *c = (const char *)mod0_start;
+
+ of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
}
space_base = (ulong)_end;
- mod0 = find_space(mod0_size, PAGE_SIZE, mbi);
-
- /* three cases
- * 1) mod0_size is not 0 and the image can be copied
- * 2) mod0_size is not 0 and the image cannot be copied
- * 3) mod0_size is 0
- */
- if (mod0_size > 0) {
- if (mod0 != 0) {
- memcpy((void *)mod0, mod0_start, mod0_size);
- mods[0].mod_start = mod0;
- mods[0].mod_end = mod0 + mod0_size;
- } else {
- of_panic("No space to copy mod0\n");
- }
- } else {
- mods[0].mod_start = mod0;
- mods[0].mod_end = mod0;
- }
+ mods[0].mod_start = mod0_start;
+ mods[0].mod_end = mod0_start + mod0_size;
of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
mods[0].mod_start, mods[0].mod_end);
@@ -909,15 +923,22 @@ static void boot_of_module(ulong r3, ulo
/* snapshot the tree */
oftree = (void*)find_space(oftree_sz, PAGE_SIZE, mbi);
- if (oftree == 0) of_panic("Could not allocate OFD tree\n");
+ if (oftree == 0)
+ of_panic("Could not allocate OFD tree\n");
of_printf("creating oftree\n");
of_test("package-to-path");
- ofd_create(oftree, oftree_sz);
+ oftree = ofd_create(oftree, oftree_sz);
pkg_save(oftree);
+
+ if (ofd_size(oftree) > oftree_sz)
+ of_panic("Could not fit all of native devtree\n");
boot_of_fixup_refs(oftree);
boot_of_fixup_chosen(oftree);
+
+ if (ofd_size(oftree) > oftree_sz)
+ of_panic("Could not fit all devtree fixups\n");
ofd_walk(oftree, OFD_ROOT, /* add_hype_props */ NULL, 2);
@@ -937,7 +958,7 @@ static int __init boot_of_cpus(void)
static int __init boot_of_cpus(void)
{
int cpus;
- int cpu;
+ int cpu, bootcpu, logical;
int result;
u32 cpu_clock[2];
@@ -962,10 +983,68 @@ static int __init boot_of_cpus(void)
cpu_khz /= 1000;
of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz);
- /* FIXME: should not depend on the boot CPU bring the first child */
+ /* Look up which CPU we are running on right now. */
+ result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu));
+ if (result == OF_FAILURE)
+ of_panic("Failed to look up boot cpu\n");
+
cpu = of_getpeer(cpu);
- while (cpu > 0) {
- of_start_cpu(cpu, (ulong)spin_start, 0);
+
+ /* We want a continuous logical cpu number space. */
+ cpu_set(0, cpu_present_map);
+ cpu_set(0, cpu_online_map);
+ cpu_set(0, cpu_possible_map);
+
+ /* Spin up all CPUS, even if there are more than NR_CPUS, because
+ * Open Firmware has them spinning on cache lines which will
+ * eventually be scrubbed, which could lead to random CPU activation.
+ */
+ for (logical = 1; cpu > 0; logical++) {
+ unsigned int cpuid, ping, pong;
+ unsigned long now, then, timeout;
+
+ if (cpu == bootcpu) {
+ of_printf("skipping boot cpu!\n");
+ continue;
+ }
+
+ result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+ if (result == OF_FAILURE)
+ of_panic("cpuid lookup failed\n");
+
+ of_printf("spinning up secondary processor #%d: ", logical);
+
+ __spin_ack = ~0x0;
+ ping = __spin_ack;
+ pong = __spin_ack;
+ of_printf("ping = 0x%x: ", ping);
+
+ mb();
+ result = of_start_cpu(cpu, (ulong)spin_start, logical);
+ if (result == OF_FAILURE)
+ of_panic("start cpu failed\n");
+
+ /* We will give the secondary processor five seconds to reply. */
+ then = mftb();
+ timeout = then + (5 * timebase_freq);
+
+ do {
+ now = mftb();
+ if (now >= timeout) {
+ of_printf("BROKEN: ");
+ break;
+ }
+
+ mb();
+ pong = __spin_ack;
+ } while (pong == ping);
+ of_printf("pong = 0x%x\n", pong);
+
+ if (pong != ping) {
+ cpu_set(logical, cpu_present_map);
+ cpu_set(logical, cpu_possible_map);
+ }
+
cpu = of_getpeer(cpu);
}
return 1;
@@ -1013,6 +1092,7 @@ multiboot_info_t __init *boot_of_init(
boot_of_rtas();
/* end of OF */
+ of_printf("Quiescing Open Firmware ...\n");
of_call("quiesce", 0, 0, NULL);
return &mbi;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dart_u3.c
--- a/xen/arch/powerpc/dart_u3.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dart_u3.c Fri Sep 01 13:04:02 2006 -0600
@@ -55,10 +55,10 @@ static void u3_inv_all(void)
dc.reg.dc_invtlb = 1;
out_32(dart_ctl_reg, dc.dc_word);
- do {
- dc.dc_word = in_32(dart_ctl_reg);
- r++;
- } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
+ do {
+ dc.dc_word = in_32(dart_ctl_reg);
+ r++;
+ } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
if (r == (1 << l)) {
if (l < 4) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dom0_ops.c
--- a/xen/arch/powerpc/dom0_ops.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dom0_ops.c Fri Sep 01 13:04:02 2006 -0600
@@ -23,16 +23,20 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
+#include <xen/shadow.h>
#include <public/xen.h>
#include <public/domctl.h>
#include <public/sysctl.h>
+void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
{
memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
/* XXX fill in rest of vcpu_guest_context_t */
}
+long arch_do_domctl(struct xen_domctl *domctl,
+ XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
long arch_do_domctl(struct xen_domctl *domctl,
XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
{
@@ -75,6 +79,19 @@ long arch_do_domctl(struct xen_domctl *d
}
}
break;
+ case XEN_DOMCTL_shadow_op:
+ {
+ struct domain *d;
+ ret = -ESRCH;
+ d = find_domain_by_id(domctl->domain);
+ if ( d != NULL )
+ {
+ ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
+ put_domain(d);
+ copy_to_guest(u_domctl, domctl, 1);
+ }
+ }
+ break;
default:
ret = -ENOSYS;
@@ -84,6 +101,8 @@ long arch_do_domctl(struct xen_domctl *d
return ret;
}
+long arch_do_sysctl(struct xen_sysctl *sysctl,
+ XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
long arch_do_sysctl(struct xen_sysctl *sysctl,
XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
{
@@ -109,6 +128,7 @@ long arch_do_sysctl(struct xen_sysctl *s
break;
default:
+ printk("%s: unsupported sysctl: 0x%x\n", __func__, (sysctl->cmd));
ret = -ENOSYS;
break;
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain.c Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@
#include <xen/domain.h>
#include <xen/console.h>
#include <xen/shutdown.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
#include <asm/htab.h>
#include <asm/current.h>
#include <asm/hcalls.h>
@@ -75,7 +77,8 @@ int arch_domain_create(struct domain *d)
{
unsigned long rma_base;
unsigned long rma_sz;
- uint htab_order;
+ uint rma_order_pages;
+ int rc;
if (d->domain_id == IDLE_DOMAIN_ID) {
d->shared_info = (void *)alloc_xenheap_page();
@@ -84,44 +87,31 @@ int arch_domain_create(struct domain *d)
return 0;
}
- d->arch.rma_order = cpu_rma_order();
- rma_sz = rma_size(d->arch.rma_order);
-
/* allocate the real mode area */
- d->max_pages = 1UL << d->arch.rma_order;
+ rma_order_pages = cpu_default_rma_order_pages();
+ d->max_pages = 1UL << rma_order_pages;
d->tot_pages = 0;
- d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0);
- if (NULL == d->arch.rma_page)
- return 1;
+
+ rc = allocate_rma(d, rma_order_pages);
+ if (rc)
+ return rc;
rma_base = page_to_maddr(d->arch.rma_page);
-
- BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
-
- printk("clearing RMO: 0x%lx[0x%lx]\n", rma_base, rma_sz);
- memset((void *)rma_base, 0, rma_sz);
+ rma_sz = rma_size(rma_order_pages);
d->shared_info = (shared_info_t *)
(rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base);
- d->arch.large_page_sizes = 1;
- d->arch.large_page_shift[0] = 24; /* 16 M for 970s */
-
- /* FIXME: we need to the the maximum addressible memory for this
- * domain to calculate this correctly. It should probably be set
- * by the managment tools */
- htab_order = d->arch.rma_order - 6; /* (1/64) */
- if (test_bit(_DOMF_privileged, &d->domain_flags)) {
- /* bump the htab size of privleged domains */
- ++htab_order;
- }
- htab_alloc(d, htab_order);
+ d->arch.large_page_sizes = cpu_large_page_orders(
+ d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order));
+
+ INIT_LIST_HEAD(&d->arch.extent_list);
return 0;
}
void arch_domain_destroy(struct domain *d)
{
- htab_free(d);
+ shadow_teardown(d);
}
void machine_halt(void)
@@ -162,6 +152,16 @@ int arch_set_info_guest(struct vcpu *v,
int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
{
memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+ printf("Domain[%d].%d: initializing\n",
+ v->domain->domain_id, v->vcpu_id);
+
+ if (v->domain->arch.htab.order == 0)
+ panic("Page table never allocated for Domain: %d\n",
+ v->domain->domain_id);
+ if (v->domain->arch.rma_order == 0)
+ panic("RMA never allocated for Domain: %d\n",
+ v->domain->domain_id);
set_bit(_VCPUF_initialised, &v->vcpu_flags);
@@ -253,17 +253,19 @@ void continue_running(struct vcpu *same)
void continue_running(struct vcpu *same)
{
/* nothing to do */
+ return;
}
void sync_vcpu_execstate(struct vcpu *v)
{
- /* XXX for now, for domain destruction, make this non-fatal */
- printf("%s: called\n", __func__);
+ /* do nothing */
+ return;
}
void domain_relinquish_resources(struct domain *d)
{
free_domheap_pages(d->arch.rma_page, d->arch.rma_order);
+ free_extents(d);
}
void arch_dump_domain_info(struct domain *d)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain_build.c
--- a/xen/arch/powerpc/domain_build.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain_build.c Fri Sep 01 13:04:02 2006 -0600
@@ -25,6 +25,7 @@
#include <xen/init.h>
#include <xen/ctype.h>
#include <xen/iocap.h>
+#include <xen/shadow.h>
#include <xen/version.h>
#include <asm/processor.h>
#include <asm/papr.h>
@@ -34,17 +35,21 @@ extern int loadelfimage_32(struct domain
extern int loadelfimage_32(struct domain_setup_info *dsi);
/* opt_dom0_mem: memory allocated to domain 0. */
-static unsigned int opt_dom0_mem;
+static unsigned int dom0_nrpages;
static void parse_dom0_mem(char *s)
{
- unsigned long long bytes = parse_size_and_unit(s);
- /* If no unit is specified we default to kB units, not bytes. */
- if (isdigit(s[strlen(s)-1]))
- opt_dom0_mem = (unsigned int)bytes;
- else
- opt_dom0_mem = (unsigned int)(bytes >> 10);
+ unsigned long long bytes;
+
+ bytes = parse_size_and_unit(s);
+ dom0_nrpages = bytes >> PAGE_SHIFT;
}
custom_param("dom0_mem", parse_dom0_mem);
+
+static unsigned int opt_dom0_max_vcpus;
+integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
+
+static unsigned int opt_dom0_shadow;
+boolean_param("dom0_shadow", opt_dom0_shadow);
int elf_sanity_check(Elf_Ehdr *ehdr)
{
@@ -105,11 +110,13 @@ int construct_dom0(struct domain *d,
struct domain_setup_info dsi;
ulong dst;
u64 *ofh_tree;
+ uint rma_nrpages = 1 << d->arch.rma_order;
ulong rma_sz = rma_size(d->arch.rma_order);
ulong rma = page_to_maddr(d->arch.rma_page);
start_info_t *si;
ulong eomem;
int am64 = 1;
+ int preempt = 0;
ulong msr;
ulong pc;
ulong r2;
@@ -118,13 +125,18 @@ int construct_dom0(struct domain *d,
BUG_ON(d->domain_id != 0);
BUG_ON(d->vcpu[0] == NULL);
+ if (image_len == 0)
+ panic("No Dom0 image supplied\n");
+
cpu_init_vcpu(v);
memset(&dsi, 0, sizeof(struct domain_setup_info));
dsi.image_addr = image_start;
dsi.image_len = image_len;
+ printk("Trying Dom0 as 64bit ELF\n");
if ((rc = parseelfimage(&dsi)) != 0) {
+ printk("Trying Dom0 as 32bit ELF\n");
if ((rc = parseelfimage_32(&dsi)) != 0)
return rc;
am64 = 0;
@@ -141,7 +153,33 @@ int construct_dom0(struct domain *d,
/* By default DOM0 is allocated all available memory. */
d->max_pages = ~0U;
- d->tot_pages = 1UL << d->arch.rma_order;
+
+ /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */
+ if (dom0_nrpages == 0) {
+ dom0_nrpages = total_pages >> 4;
+
+ if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES)
+ dom0_nrpages = CONFIG_MIN_DOM0_PAGES;
+ }
+
+ /* make sure we are at least as big as the RMA */
+ if (dom0_nrpages > rma_nrpages)
+ dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages);
+
+ ASSERT(d->tot_pages == dom0_nrpages);
+ ASSERT(d->tot_pages >= rma_nrpages);
+
+ if (opt_dom0_shadow == 0) {
+ /* 1/64 of memory */
+ opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT);
+ }
+
+ do {
+ shadow_set_allocation(d, opt_dom0_shadow, &preempt);
+ } while (preempt);
+ if (shadow_get_allocation(d) == 0)
+ panic("shadow allocation failed 0x%x < 0x%x\n",
+ shadow_get_allocation(d), opt_dom0_shadow);
ASSERT( image_len < rma_sz );
@@ -156,10 +194,6 @@ int construct_dom0(struct domain *d,
printk("shared_info: 0x%lx,%p\n", si->shared_info, d->shared_info);
eomem = si->shared_info;
-
- /* allow dom0 to access all of system RAM */
- d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */
- d->arch.logical_end_pfn = max_page;
/* number of pages accessible */
si->nr_pages = rma_sz >> PAGE_SHIFT;
@@ -265,7 +299,7 @@ int construct_dom0(struct domain *d,
printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2);
- ofd_dom0_fixup(d, *ofh_tree + rma, si, dst - rma);
+ ofd_dom0_fixup(d, *ofh_tree + rma, si);
set_bit(_VCPUF_initialised, &v->vcpu_flags);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.c Fri Sep 01 13:04:02 2006 -0600
@@ -82,6 +82,8 @@ void program_exception(struct cpu_user_r
show_registers(regs);
printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
printk("hid4 0x%016lx\n", regs->hid4);
+ printk("---[ backtrace ]---\n");
+ show_backtrace(regs->gprs[1], regs->lr, regs->pc);
panic("%s: 0x%lx\n", __func__, cookie);
#endif /* CRASH_DEBUG */
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.h
--- a/xen/arch/powerpc/exceptions.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.h Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,4 @@ extern char exception_vectors[];
extern char exception_vectors[];
extern char exception_vectors_end[];
extern int spin_start[];
-extern int firmware_image_start[0];
-extern int firmware_image_size[0];
-
#endif
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/external.c Fri Sep 01 13:04:02 2006 -0600
@@ -175,8 +175,7 @@ void init_IRQ(void)
void ack_APIC_irq(void)
{
- printk("%s: EOI the whole MPIC?\n", __func__);
- for (;;);
+ panic("%s: EOI the whole MPIC?\n", __func__);
}
void ack_bad_irq(unsigned int irq)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/iommu.c
--- a/xen/arch/powerpc/iommu.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/iommu.c Fri Sep 01 13:04:02 2006 -0600
@@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio
pfn = tce.tce_bits.tce_rpn;
mfn = pfn2mfn(d, pfn, &mtype);
- if (mtype != 0) {
- panic("we don't do non-RMO memory yet\n");
+ if (mfn > 0) {
+#ifdef DEBUG
+ printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
+ ioba, pfn, mfn);
+#endif
+ tce.tce_bits.tce_rpn = mfn;
+ return iommu_phbs[buid].iommu_put(ioba, tce);
}
-
-#ifdef DEBUG
- printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
- ioba, pfn, mfn);
-#endif
- tce.tce_bits.tce_rpn = mfn;
-
- return iommu_phbs[buid].iommu_put(ioba, tce);
}
return -1;
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mm.c Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
*
* Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ * Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
*/
#include <xen/config.h>
@@ -23,9 +24,18 @@
#include <xen/shadow.h>
#include <xen/kernel.h>
#include <xen/sched.h>
+#include <xen/perfc.h>
#include <asm/misc.h>
#include <asm/init.h>
#include <asm/page.h>
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...) \
+ printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+ current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
/* Frame table and its size in pages. */
struct page_info *frame_table;
@@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct
return 1;
}
-
-int get_page_type(struct page_info *page, u32 type)
-{
- panic("%s called\n", __func__);
- return 1;
-}
-
void put_page_type(struct page_info *page)
{
- panic("%s called\n", __func__);
+ unsigned long nx, x, y = page->u.inuse.type_info;
+
+ do {
+ x = y;
+ nx = x - 1;
+
+ ASSERT((x & PGT_count_mask) != 0);
+
+ /*
+ * The page should always be validated while a reference is held. The
+ * exception is during domain destruction, when we forcibly invalidate
+ * page-table pages if we detect a referential loop.
+ * See domain.c:relinquish_list().
+ */
+ ASSERT((x & PGT_validated) ||
+ test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ /* Record TLB information for flush later. */
+ page->tlbflush_timestamp = tlbflush_current_time();
+ }
+ else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) ==
+ (PGT_pinned | 1)) )
+ {
+ /* Page is now only pinned. Make the back pointer mutable again. */
+ nx |= PGT_va_mutable;
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct page_info *page, unsigned long type)
+{
+ unsigned long nx, x, y = page->u.inuse.type_info;
+
+ again:
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
+ return 0;
+ }
+ else if ( unlikely((x & PGT_count_mask) == 0) )
+ {
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+ {
+ if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+ {
+ /*
+ * On type change we check to flush stale TLB
+ * entries. This may be unnecessary (e.g., page
+ * was GDT/LDT) but those circumstances should be
+ * very rare.
+ */
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
+ tlbflush_filter(mask, page->tlbflush_timestamp);
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
+ }
+
+ /* We lose existing type, back pointer, and validity. */
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+ nx |= type;
+
+ /* No special validation needed for writable pages. */
+ /* Page tables and GDT/LDT need to be scanned for validity. */
+ if ( type == PGT_writable_page )
+ nx |= PGT_validated;
+ }
+ }
+ else
+ {
+ if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+ {
+ if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ return 0;
+ }
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va backpointer is mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( (type & PGT_va_mask) != PGT_va_mutable )
+ {
+ ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+
+ /* This table is possibly mapped at multiple locations. */
+ nx &= ~PGT_va_mask;
+ nx |= PGT_va_unknown;
+ }
+ }
+ if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ cpu_relax();
+ goto again;
+ }
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+ if ( unlikely(!(nx & PGT_validated)) )
+ {
+ /* Noone else is updating simultaneously. */
+ __set_bit(_PGT_validated, &page->u.inuse.type_info);
+ }
+
+ return 1;
}
void __init init_frametable(void)
@@ -107,44 +229,148 @@ extern void copy_page(void *dp, void *sp
}
}
+static int mfn_in_hole(ulong mfn)
+{
+ /* totally cheating */
+ if (mfn >= (0xf0000000UL >> PAGE_SHIFT) &&
+ mfn < (((1UL << 32) - 1) >> PAGE_SHIFT))
+ return 1;
+
+ return 0;
+}
+
+static uint add_extent(struct domain *d, struct page_info *pg, uint order)
+{
+ struct page_extents *pe;
+
+ pe = xmalloc(struct page_extents);
+ if (pe == NULL)
+ return 0;
+
+ pe->pg = pg;
+ pe->order = order;
+ pe->pfn = page_to_mfn(pg);
+
+ list_add_tail(&pe->pe_list, &d->arch.extent_list);
+
+ return pe->pfn;
+}
+
+void free_extents(struct domain *d)
+{
+ /* we just need to free the memory behind list */
+ struct list_head *list;
+ struct list_head *ent;
+ struct list_head *next;
+
+ list = &d->arch.extent_list;
+ ent = list->next;
+
+ while (ent != list) {
+ next = ent->next;
+ xfree(ent);
+ ent = next;
+ }
+}
+
+uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages)
+{
+ uint ext_order;
+ uint ext_nrpages;
+ uint total_nrpages;
+ struct page_info *pg;
+
+ ext_order = cpu_extent_order();
+ ext_nrpages = 1 << ext_order;
+
+ total_nrpages = rma_nrpages;
+
+ /* We only allocate in nr_extsz chunks so if you are not divisible
+ * you get more than you asked for */
+ while (total_nrpages < nrpages) {
+ pg = alloc_domheap_pages(d, ext_order, 0);
+ if (pg == NULL)
+ return total_nrpages;
+
+ if (add_extent(d, pg, ext_order) == 0) {
+ free_domheap_pages(pg, ext_order);
+ return total_nrpages;
+ }
+ total_nrpages += ext_nrpages;
+ }
+
+ return total_nrpages;
+}
+
+int allocate_rma(struct domain *d, unsigned int order_pages)
+{
+ ulong rma_base;
+ ulong rma_sz = rma_size(order_pages);
+
+ d->arch.rma_page = alloc_domheap_pages(d, order_pages, 0);
+ if (d->arch.rma_page == NULL) {
+ DPRINTK("Could not allocate order_pages=%d RMA for domain %u\n",
+ order_pages, d->domain_id);
+ return -ENOMEM;
+ }
+ d->arch.rma_order = order_pages;
+
+ rma_base = page_to_maddr(d->arch.rma_page);
+ BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
+
+ /* XXX */
+ printk("clearing RMA: 0x%lx[0x%lx]\n", rma_base, rma_sz);
+ memset((void *)rma_base, 0, rma_sz);
+
+ return 0;
+}
+
ulong pfn2mfn(struct domain *d, long pfn, int *type)
{
ulong rma_base_mfn = page_to_mfn(d->arch.rma_page);
ulong rma_size_mfn = 1UL << d->arch.rma_order;
- ulong mfn;
- int t;
+ struct page_extents *pe;
if (pfn < rma_size_mfn) {
- mfn = pfn + rma_base_mfn;
- t = PFN_TYPE_RMA;
- } else if (pfn >= d->arch.logical_base_pfn &&
- pfn < d->arch.logical_end_pfn) {
- if (test_bit(_DOMF_privileged, &d->domain_flags)) {
- /* This hack allows dom0 to map all memory, necessary to
- * initialize domU state. */
- mfn = pfn;
- } else {
- panic("we do not handle the logical area yet\n");
- mfn = 0;
- }
-
- t = PFN_TYPE_LOGICAL;
- } else {
- /* don't know */
- mfn = pfn;
- t = PFN_TYPE_IO;
- }
-
- if (type != NULL)
- *type = t;
-
- return mfn;
+ if (type)
+ *type = PFN_TYPE_RMA;
+ return pfn + rma_base_mfn;
+ }
+
+ if (test_bit(_DOMF_privileged, &d->domain_flags) &&
+ mfn_in_hole(pfn)) {
+ if (type)
+ *type = PFN_TYPE_IO;
+ return pfn;
+ }
+
+ /* quick tests first */
+ list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+ uint end_pfn = pe->pfn + (1 << pe->order);
+
+ if (pfn >= pe->pfn && pfn < end_pfn) {
+ if (type)
+ *type = PFN_TYPE_LOGICAL;
+ return page_to_mfn(pe->pg) + (pfn - pe->pfn);
+ }
+ }
+
+ /* This hack allows dom0 to map all memory, necessary to
+ * initialize domU state. */
+ if (test_bit(_DOMF_privileged, &d->domain_flags)) {
+ if (type)
+ *type = PFN_TYPE_REMOTE;
+ return pfn;
+ }
+
+ BUG();
+ return 0;
}
void guest_physmap_add_page(
struct domain *d, unsigned long gpfn, unsigned long mfn)
{
- panic("%s\n", __func__);
+ printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn);
}
void guest_physmap_remove_page(
struct domain *d, unsigned long gpfn, unsigned long mfn)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mpic.c Fri Sep 01 13:04:02 2006 -0600
@@ -498,10 +498,10 @@ static void mpic_enable_irq(unsigned int
#ifdef CONFIG_MPIC_BROKEN_U3
if (mpic->flags & MPIC_BROKEN_U3) {
- unsigned int src = irq - mpic->irq_offset;
- if (mpic_is_ht_interrupt(mpic, src) &&
+ unsigned int bsrc = irq - mpic->irq_offset;
+ if (mpic_is_ht_interrupt(mpic, bsrc) &&
(irq_desc[irq].status & IRQ_LEVEL))
- mpic_ht_end_irq(mpic, src);
+ mpic_ht_end_irq(mpic, bsrc);
}
#endif /* CONFIG_MPIC_BROKEN_U3 */
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup.c
--- a/xen/arch/powerpc/ofd_fixup.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/ofd_fixup.c Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
*
* Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
*/
@@ -24,6 +24,7 @@
#include <xen/version.h>
#include <public/xen.h>
#include "of-devtree.h"
+#include "oftree.h"
#undef RTAS
@@ -316,91 +317,6 @@ static ofdn_t ofd_rtas_props(void *m)
}
#endif
-struct mem_reg {
- u64 addr;
- u64 sz;
-};
-
-static ofdn_t ofd_memory_chunk_create(void *m, ofdn_t p,
- const char *ppath,
- const char *name,
- const char *dt,
- ulong start, ulong size)
-{
- struct mem_reg reg;
- char path[128];
- ulong l;
- u32 v;
- ofdn_t n;
- ulong nl = strlen(name) + 1;
- ulong dtl = strlen(dt) + 1;
-
- l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
- n = ofd_node_add(m, p, path, l + 1);
- ofd_prop_add(m, n, "name", name, nl);
-
- v = 1;
- ofd_prop_add(m, n, "#address-cells", &v, sizeof (v));
- v = 0;
- ofd_prop_add(m, n, "#size-cells", &v, sizeof (v));
-
- ofd_prop_add(m, n, "device_type", dt, dtl);
-
- /* physical addresses usable without regard to OF */
- reg.addr = start;
- reg.sz = size;
- ofd_prop_add(m, n, "reg", ®, sizeof (reg));
-
- return n;
-}
-
-static ofdn_t ofd_memory_props(void *m, struct domain *d, ulong eoload)
-{
- ofdn_t n = -1;
- ulong start = 0;
- static char name[] = "memory";
- ulong mem_size = rma_size(d->arch.rma_order);
- ulong chunk_size = rma_size(d->arch.rma_order);
-
- /* Remove all old memory props */
- do {
- ofdn_t old;
-
- old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
- name, sizeof(name));
- if (old <= 0) break;
-
- ofd_node_prune(m, old);
- } while (1);
-
- while (start < mem_size) {
- ulong size = (mem_size < chunk_size) ? mem_size : chunk_size;
-
- n = ofd_memory_chunk_create(m, OFD_ROOT, "", "memory", "memory",
- start, size);
-
- if (start == 0) {
- /* We are processing the first and RMA chunk */
-
- /* free list of physical addresses available after OF and
- * client program have been accounted for */
- struct mem_reg avail[] = {
- /* 0 til OF @ 32MiB - 16KiB stack */
- { .addr = 0, .sz = ((32 << 20) - (16 << 10)) },
- /* end of loaded material to the end the chunk - 1 page */
- { .addr = eoload, .sz = chunk_size - eoload - PAGE_SIZE },
- /* the last page is reserved for xen_start_info */
- };
- ofd_prop_add(m, n, "available", &avail,
- sizeof (avail));
- }
-
- start += size;
- mem_size -= size;
- }
- return n;
-}
-
static ofdn_t ofd_xen_props(void *m, struct domain *d, start_info_t *si)
{
ofdn_t n;
@@ -440,9 +356,8 @@ static ofdn_t ofd_xen_props(void *m, str
}
return n;
}
-extern int ofd_dom0_fixup(
- struct domain *d, ulong oftree, start_info_t *si, ulong dst);
-int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si, ulong eoload)
+
+int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si)
{
void *m;
const ofdn_t n = OFD_ROOT;
@@ -470,8 +385,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
printk("Add /chosen props\n");
ofd_chosen_props(m, (char *)si->cmd_line);
- printk("fix /memory@0 props\n");
- ofd_memory_props(m, d, eoload);
+ printk("fix /memory props\n");
+ ofd_memory_props(m, d);
printk("fix /xen props\n");
ofd_xen_props(m, d, si);
@@ -497,8 +412,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
r = ofd_prop_add(m, n, "ibm,partition-no", &did, sizeof(did));
ASSERT( r > 0 );
- const char dom0[] = "dom0";
- r = ofd_prop_add(m, n, "ibm,partition-name", dom0, sizeof (dom0));
+ const char d0[] = "dom0";
+ r = ofd_prop_add(m, n, "ibm,partition-name", d0, sizeof (d0));
ASSERT( r > 0 );
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/oftree.h
--- a/xen/arch/powerpc/oftree.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/oftree.h Fri Sep 01 13:04:02 2006 -0600
@@ -20,14 +20,18 @@
#ifndef _OFTREE_H
#define _OFTREE_H
+#include <xen/multiboot.h>
extern ulong oftree;
extern ulong oftree_len;
+extern ulong oftree_end;
-extern int ofd_dom0_fixup(
- struct domain *d, ulong oftree, start_info_t *si, ulong dst);
+extern int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si);
+extern void ofd_memory_props(void *m, struct domain *d);
extern int firmware_image_start[0];
extern int firmware_image_size[0];
+extern void memory_init(module_t *mod, int mcount);
+
#endif /* #ifndef _OFTREE_H */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/tce.c
--- a/xen/arch/powerpc/papr/tce.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/tce.c Fri Sep 01 13:04:02 2006 -0600
@@ -47,7 +47,7 @@ static void h_put_tce(struct cpu_user_re
regs->gprs[3] = H_Success;
}
}
-
+
static void h_get_tce(struct cpu_user_regs *regs)
{
u32 liobn = regs->gprs[4];
@@ -57,7 +57,7 @@ static void h_get_tce(struct cpu_user_re
printk("%s: liobn: 0x%x ioba: 0x%lx \n", __func__, liobn, ioba);
#endif
regs->gprs[3] = H_Function;
- for(;;) ;
+ BUG();
}
static void h_stuff_tce(struct cpu_user_regs *regs)
@@ -76,7 +76,7 @@ static void h_stuff_tce(struct cpu_user_
count);
#endif
regs->gprs[3] = H_Function;
- for(;;);
+ BUG();
}
__init_papr_hcall(H_PUT_TCE, h_put_tce);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/xlate.c
--- a/xen/arch/powerpc/papr/xlate.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/xlate.c Fri Sep 01 13:04:02 2006 -0600
@@ -30,12 +30,6 @@
#include <asm/papr.h>
#include <asm/hcalls.h>
-static void not_yet(struct cpu_user_regs *regs)
-{
- printk("not implemented yet: 0x%lx\n", regs->gprs[3]);
- for (;;);
-}
-
#ifdef USE_PTE_INSERT
static inline void pte_insert(union pte volatile *pte,
ulong vsid, ulong rpn, ulong lrpn)
@@ -160,13 +154,13 @@ static void h_enter(struct cpu_user_regs
}
/* get correct pgshift value */
- pgshift = d->arch.large_page_shift[lp_size];
+ pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT;
}
/* get the correct logical RPN in terms of 4K pages need to mask
* off lp bits and unused arpn bits if this is a large page */
- lpn = ~0ULL << (pgshift - 12);
+ lpn = ~0ULL << (pgshift - PAGE_SHIFT);
lpn = pte.bits.rpn & lpn;
rpn = pfn2mfn(d, lpn, &mtype);
@@ -493,8 +487,42 @@ static void h_remove(struct cpu_user_reg
pte_tlbie(&lpte, ptex);
}
+static void h_read(struct cpu_user_regs *regs)
+{
+ ulong flags = regs->gprs[4];
+ ulong ptex = regs->gprs[5];
+ struct vcpu *v = get_current();
+ struct domain *d = v->domain;
+ struct domain_htab *htab = &d->arch.htab;
+ union pte volatile *pte;
+
+ if (flags & H_READ_4)
+ ptex &= ~0x3UL;
+
+ if (ptex > (1UL << htab->log_num_ptes)) {
+ regs->gprs[3] = H_Parameter;
+ printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
+ return;
+ }
+ pte = &htab->map[ptex];
+ regs->gprs[4] = pte[0].words.vsid;
+ regs->gprs[5] = pte[0].words.rpn;
+
+ if (!(flags & H_READ_4)) {
+ /* dump another 3 PTEs */
+ regs->gprs[6] = pte[1].words.vsid;
+ regs->gprs[7] = pte[1].words.rpn;
+ regs->gprs[8] = pte[2].words.vsid;
+ regs->gprs[9] = pte[2].words.rpn;
+ regs->gprs[10] = pte[3].words.vsid;
+ regs->gprs[11] = pte[3].words.rpn;
+ }
+
+ regs->gprs[3] = H_Success;
+}
+
__init_papr_hcall(H_ENTER, h_enter);
-__init_papr_hcall(H_READ, not_yet);
+__init_papr_hcall(H_READ, h_read);
__init_papr_hcall(H_REMOVE, h_remove);
__init_papr_hcall(H_CLEAR_MOD, h_clear_mod);
__init_papr_hcall(H_CLEAR_REF, h_clear_ref);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/exceptions.S Fri Sep 01 13:04:02 2006 -0600
@@ -514,6 +514,43 @@ _GLOBAL(sleep)
mtmsrd r3
blr
+/* The primary processor issues a firmware call to spin us up at this
+ * address, passing our CPU number in r3. We only need a function
+ * entry point instead of a descriptor since this is never called from
+ * C code.
+ */
.globl spin_start
spin_start:
+ /* Write our processor number as an acknowledgment that we're alive. */
+ LOADADDR(r14, __spin_ack)
+ stw r3, 0(r14)
+ sync
+ /* If NR_CPUS is too small, we should just spin forever. */
+ LOADADDR(r15, NR_CPUS)
+ cmpd r3, r15
+ blt 2f
b .
+ /* Find our index in the array of processor_area struct pointers. */
+2: LOADADDR(r14, global_cpu_table)
+ muli r15, r3, 8
+ add r14, r14, r15
+ /* Spin until the pointer for our processor goes valid. */
+1: ld r15, 0(r14)
+ cmpldi r15, 0
+ beq 1b
+ /* Dereference the pointer and load our stack pointer. */
+ isync
+ ld r1, PAREA_stack(r15)
+ li r14, STACK_FRAME_OVERHEAD
+ sub r1, r1, r14
+ /* Load up the TOC and entry point for the C function to be called. */
+ LOADADDR(r14, secondary_cpu_init)
+ ld r2, 8(r14)
+ ld r11, 0(r14)
+ mtctr r11
+ /* Warning: why do we need this synchronizing instruction on 970FX? */
+ isync
+ /* Jump into C code now. */
+ bctrl
+ nop
+ b .
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/ppc970.c Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
*
* Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ * Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
*/
#include <xen/config.h>
@@ -31,25 +32,68 @@
#undef SERIALIZE
-unsigned int cpu_rma_order(void)
+extern volatile struct processor_area * volatile global_cpu_table[];
+
+struct rma_settings {
+ int order;
+ int rmlr0;
+ int rmlr12;
+};
+
+static struct rma_settings rma_orders[] = {
+ { .order = 26, .rmlr0 = 0, .rmlr12 = 3, }, /* 64 MB */
+ { .order = 27, .rmlr0 = 1, .rmlr12 = 3, }, /* 128 MB */
+ { .order = 28, .rmlr0 = 1, .rmlr12 = 0, }, /* 256 MB */
+ { .order = 30, .rmlr0 = 0, .rmlr12 = 2, }, /* 1 GB */
+ { .order = 34, .rmlr0 = 0, .rmlr12 = 1, }, /* 16 GB */
+ { .order = 38, .rmlr0 = 0, .rmlr12 = 0, }, /* 256 GB */
+};
+
+static uint log_large_page_sizes[] = {
+ 4 + 20, /* (1 << 4) == 16M */
+};
+
+static struct rma_settings *cpu_find_rma(unsigned int order)
{
- /* XXX what about non-HV mode? */
- uint rma_log_size = 6 + 20; /* 64M */
- return rma_log_size - PAGE_SHIFT;
+ int i;
+ for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
+ if (rma_orders[i].order == order)
+ return &rma_orders[i];
+ }
+ return NULL;
}
-void cpu_initialize(void)
+unsigned int cpu_default_rma_order_pages(void)
{
- ulong stack;
+ return rma_orders[0].order - PAGE_SHIFT;
+}
- parea = xmalloc(struct processor_area);
+unsigned int cpu_large_page_orders(uint *sizes, uint max)
+{
+ uint i = 0;
+
+ while (i < max && i < ARRAY_SIZE(log_large_page_sizes)) {
+ sizes[i] = log_large_page_sizes[i] - PAGE_SHIFT;
+ ++i;
+ }
+
+ return i;
+}
+
+unsigned int cpu_extent_order(void)
+{
+ return log_large_page_sizes[0] - PAGE_SHIFT;
+}
+
+void cpu_initialize(int cpuid)
+{
+ ulong r1, r2;
+ __asm__ __volatile__ ("mr %0, 1" : "=r" (r1));
+ __asm__ __volatile__ ("mr %0, 2" : "=r" (r2));
+
+ /* This is SMP safe because the compiler must use r13 for it. */
+ parea = global_cpu_table[cpuid];
ASSERT(parea != NULL);
-
- stack = (ulong)alloc_xenheap_pages(STACK_ORDER);
-
- ASSERT(stack != 0);
- parea->hyp_stack_base = (void *)(stack + STACK_SIZE);
- printk("stack is here: %p\n", parea->hyp_stack_base);
mthsprg0((ulong)parea); /* now ready for exceptions */
@@ -79,7 +123,10 @@ void cpu_initialize(void)
s |= 1UL << (63-3); /* ser-gp */
hid0.word |= s;
#endif
- printk("hid0: 0x%016lx\n", hid0.word);
+
+ printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n",
+ smp_processor_id(), r1, r2, hid0.word);
+
mthid0(hid0.word);
union hid1 hid1;
@@ -115,45 +162,22 @@ void cpu_init_vcpu(struct vcpu *v)
{
struct domain *d = v->domain;
union hid4 hid4;
- ulong rma_base = page_to_maddr(d->arch.rma_page);
- ulong rma_size = rma_size(d->arch.rma_order);
+ struct rma_settings *rma_settings;
hid4.word = mfhid4();
hid4.bits.lpes0 = 0; /* exceptions set MSR_HV=1 */
hid4.bits.lpes1 = 1; /* RMA applies */
- hid4.bits.rmor = rma_base >> 26;
+ hid4.bits.rmor = page_to_maddr(d->arch.rma_page) >> 26;
hid4.bits.lpid01 = d->domain_id & 3;
hid4.bits.lpid25 = (d->domain_id >> 2) & 0xf;
- switch (rma_size) {
- case 256ULL << 30: /* 256 GB */
- hid4.bits.rmlr0 = 0;
- hid4.bits.rmlr12 = 0;
- break;
- case 16ULL << 30: /* 16 GB */
- hid4.bits.rmlr0 = 0;
- hid4.bits.rmlr12 = 1;
- break;
- case 1ULL << 30: /* 1 GB */
- hid4.bits.rmlr0 = 0;
- hid4.bits.rmlr12 = 2;
- break;
- case 64ULL << 20: /* 64 MB */
- hid4.bits.rmlr0 = 0;
- hid4.bits.rmlr12 = 3;
- break;
- case 256ULL << 20: /* 256 MB */
- hid4.bits.rmlr0 = 1;
- hid4.bits.rmlr12 = 0;
- break;
- case 128ULL << 20: /* 128 MB */
- hid4.bits.rmlr0 = 1;
- hid4.bits.rmlr12 = 3;
- break;
- }
+ rma_settings = cpu_find_rma(d->arch.rma_order + PAGE_SHIFT);
+ ASSERT(rma_settings != NULL);
+ hid4.bits.rmlr0 = rma_settings->rmlr0;
+ hid4.bits.rmlr12 = rma_settings->rmlr12;
v->arch.cpu.hid4.word = hid4.word;
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/setup.c Fri Sep 01 13:04:02 2006 -0600
@@ -43,9 +43,9 @@
#include <asm/percpu.h>
#include "exceptions.h"
#include "of-devtree.h"
+#include "oftree.h"
#define DEBUG
-unsigned long xenheap_phys_end;
/* opt_noht: If true, Hyperthreading is ignored. */
int opt_noht = 0;
@@ -53,6 +53,14 @@ boolean_param("noht", opt_noht);
int opt_earlygdb = 0;
boolean_param("earlygdb", opt_earlygdb);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
u32 tlbflush_clock = 1U;
DEFINE_PER_CPU(u32, tlbflush_time);
@@ -61,9 +69,12 @@ unsigned long wait_init_idle;
unsigned long wait_init_idle;
ulong oftree;
ulong oftree_len;
+ulong oftree_end;
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
+cpumask_t cpu_present_map;
+cpumask_t cpu_possible_map;
/* XXX get this from ISA node in device tree */
ulong isa_io_base;
@@ -74,6 +85,8 @@ extern void idle_loop(void);
/* move us to a header file */
extern void initialize_keytable(void);
+
+volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
int is_kernel_text(unsigned long addr)
{
@@ -169,6 +182,21 @@ static void __init start_of_day(void)
percpu_free_unused_areas();
+ {
+ /* FIXME: Xen assumes that an online CPU is a schedualable
+ * CPU, but we just are not there yet. Remove this fragment when
+ * scheduling processors actually works. */
+ int cpuid;
+
+ printk("WARNING!: Taking all secondary CPUs offline\n");
+
+ for_each_online_cpu(cpuid) {
+ if (cpuid == 0)
+ continue;
+ cpu_clear(cpuid, cpu_online_map);
+ }
+ }
+
initialize_keytable();
/* Register another key that will allow for the the Harware Probe
* to be contacted, this works with RiscWatch probes and should
@@ -193,17 +221,60 @@ void startup_cpu_idle_loop(void)
reset_stack_and_jump(idle_loop);
}
+static void init_parea(int cpuid)
+{
+ /* Be careful not to shadow the global variable. */
+ volatile struct processor_area *pa;
+ void *stack;
+
+ pa = xmalloc(struct processor_area);
+ if (pa == NULL)
+ panic("%s: failed to allocate parea for cpu #%d\n", __func__, cpuid);
+
+ stack = alloc_xenheap_pages(STACK_ORDER);
+ if (stack == NULL)
+ panic("%s: failed to allocate stack (order %d) for cpu #%d\n",
+ __func__, STACK_ORDER, cpuid);
+
+ pa->whoami = cpuid;
+ pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+
+ /* This store has the effect of invoking secondary_cpu_init. */
+ global_cpu_table[cpuid] = pa;
+ mb();
+}
+
+static int kick_secondary_cpus(int maxcpus)
+{
+ int cpuid;
+
+ for_each_present_cpu(cpuid) {
+ if (cpuid == 0)
+ continue;
+ if (cpuid >= maxcpus)
+ break;
+ init_parea(cpuid);
+ cpu_set(cpuid, cpu_online_map);
+ cpu_set(cpuid, cpu_possible_map);
+ }
+
+ return 0;
+}
+
+/* This is the first C code that secondary processors invoke. */
+int secondary_cpu_init(int cpuid, unsigned long r4);
+int secondary_cpu_init(int cpuid, unsigned long r4)
+{
+ cpu_initialize(cpuid);
+ while(1);
+}
+
static void __init __start_xen(multiboot_info_t *mbi)
{
char *cmdline;
module_t *mod = (module_t *)((ulong)mbi->mods_addr);
- ulong heap_start;
- ulong modules_start, modules_size;
- ulong eomem = 0;
- ulong heap_size = 0;
- ulong bytes = 0;
- ulong freemem = (ulong)_end;
- ulong oftree_end;
+ ulong dom0_start, dom0_len;
+ ulong initrd_start, initrd_len;
memcpy(0, exception_vectors, exception_vectors_end - exception_vectors);
synchronize_caches(0, exception_vectors_end - exception_vectors);
@@ -226,6 +297,9 @@ static void __init __start_xen(multiboot
console_start_sync();
#endif
+ /* we give the first RMA to the hypervisor */
+ xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
+
/* Check that we have at least one Multiboot module. */
if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
panic("FATAL ERROR: Require at least one Multiboot module.\n");
@@ -234,10 +308,6 @@ static void __init __start_xen(multiboot
if (!(mbi->flags & MBI_MEMMAP)) {
panic("FATAL ERROR: Bootloader provided no memory information.\n");
}
-
- /* mark the begining of images */
- modules_start = mod[0].mod_start;
- modules_size = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
/* OF dev tree is the last module */
oftree = mod[mbi->mods_count-1].mod_start;
@@ -249,71 +319,7 @@ static void __init __start_xen(multiboot
mod[mbi->mods_count-1].mod_end = 0;
--mbi->mods_count;
- printk("Physical RAM map:\n");
-
- /* lets find out how much memory there is */
- while (bytes < mbi->mmap_length) {
- u64 end;
- u64 addr;
- u64 size;
-
- memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr + bytes);
- addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
- size = ((u64)map->length_high << 32) | (u64)map->length_low;
- end = addr + size;
-
- printk(" %016lx - %016lx (usable)\n", addr, end);
-
- if (addr > eomem) {
- printk("found a hole skipping remainder of memory at:\n"
- " %016lx and beyond\n", addr);
- break;
- }
- if (end > eomem) {
- eomem = end;
- }
- bytes += map->size + 4;
- }
-
- printk("System RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
-
- /* top of memory */
- max_page = PFN_DOWN(ALIGN_DOWN(eomem, PAGE_SIZE));
- total_pages = max_page;
-
- /* Architecturally the first 4 pages are exception hendlers, we
- * will also be copying down some code there */
- heap_start = init_boot_allocator(4 << PAGE_SHIFT);
-
- /* we give the first RMA to the hypervisor */
- xenheap_phys_end = rma_size(cpu_rma_order());
-
- /* allow everything else to be allocated */
- init_boot_pages(xenheap_phys_end, eomem);
- init_frametable();
- end_boot_allocator();
-
- /* Add memory between the beginning of the heap and the beginning
- * of out text */
- init_xenheap_pages(heap_start, (ulong)_start);
-
- /* move the modules to just after _end */
- if (modules_start) {
- printk("modules at: %016lx - %016lx\n", modules_start,
- modules_start + modules_size);
- freemem = ALIGN_UP(freemem, PAGE_SIZE);
- memmove((void *)freemem, (void *)modules_start, modules_size);
-
- oftree -= modules_start - freemem;
- modules_start = freemem;
- freemem += modules_size;
- printk(" moved to: %016lx - %016lx\n", modules_start,
- modules_start + modules_size);
- }
-
- /* the rest of the xenheap, starting at the end of modules */
- init_xenheap_pages(freemem, xenheap_phys_end);
-
+ memory_init(mod, mbi->mods_count);
#ifdef OF_DEBUG
printk("ofdump:\n");
@@ -321,13 +327,10 @@ static void __init __start_xen(multiboot
ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
#endif
- heap_size = xenheap_phys_end - heap_start;
-
- printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
-
percpu_init_areas();
- cpu_initialize();
+ init_parea(0);
+ cpu_initialize(0);
#ifdef CONFIG_GDB
initialise_gdb();
@@ -335,6 +338,14 @@ static void __init __start_xen(multiboot
debugger_trap_immediate();
#endif
+ /* Deal with secondary processors. */
+ if (opt_nosmp) {
+ printk("nosmp: leaving secondary processors spinning forever\n");
+ } else {
+ printk("spinning up at most %d total processors ...\n", max_cpus);
+ kick_secondary_cpus(max_cpus);
+ }
+
start_of_day();
/* Create initial domain 0. */
@@ -353,22 +364,26 @@ static void __init __start_xen(multiboot
/* Scrub RAM that is still free and so may go to an unprivileged domain. */
scrub_heap_pages();
- /*
- * We're going to setup domain0 using the module(s) that we
- * stashed safely above our heap. The second module, if present,
- * is an initrd ramdisk. The last module is the OF devtree.
- */
- if (construct_dom0(dom0,
- modules_start,
- mod[0].mod_end-mod[0].mod_start,
- (mbi->mods_count == 1) ? 0 :
- modules_start +
- (mod[1].mod_start-mod[0].mod_start),
- (mbi->mods_count == 1) ? 0 :
- mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
+ dom0_start = mod[0].mod_start;
+ dom0_len = mod[0].mod_end - mod[0].mod_start;
+ if (mbi->mods_count > 1) {
+ initrd_start = mod[1].mod_start;
+ initrd_len = mod[1].mod_end - mod[1].mod_start;
+ } else {
+ initrd_start = 0;
+ initrd_len = 0;
+ }
+ if (construct_dom0(dom0, dom0_start, dom0_len,
+ initrd_start, initrd_len,
cmdline) != 0) {
panic("Could not set up DOM0 guest OS\n");
}
+
+ init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
+ ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
+ if (initrd_start)
+ init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
+ ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
init_trace_bufs();
@@ -407,6 +422,8 @@ void arch_get_xen_caps(xen_capabilities_
void arch_get_xen_caps(xen_capabilities_info_t info)
{
}
+
+
/*
* Local variables:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/xen.lds.S Fri Sep 01 13:04:02 2006 -0600
@@ -10,11 +10,15 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_
SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64");
SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib");
SEARCH_DIR("=/usr/lib");
/* Do we need any of these for elf?
__DYNAMIC = 0; */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS;
+}
SECTIONS
{
/* Read-only sections, merged into text segment: */
PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
- .interp : { *(.interp) }
+ .interp : { *(.interp) } :text
.hash : { *(.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
@@ -103,7 +107,7 @@ SECTIONS
PROVIDE (__fini_array_end = .);
.data :
{
- *(.data .data.* .gnu.linkonce.d.*)
+ *(.data .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
@@ -121,7 +125,7 @@ SECTIONS
__inithcall_end = .;
__per_cpu_start = .;
- .data.percpu : { *(.data.percpu) } :text
+ .data.percpu : { *(.data.percpu) }
__per_cpu_data_end = .;
. = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
. = ALIGN(STACK_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/io.c Fri Sep 01 13:04:02 2006 -0600
@@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u
break;
case INSTR_BT:
- index = operand_index(src);
- value = get_reg_value(size, index, 0, regs);
-
+ if ( src & REGISTER )
+ {
+ index = operand_index(src);
+ value = get_reg_value(size, index, 0, regs);
+ }
+ else if ( src & IMMEDIATE )
+ value = mmio_opp->immediate;
if (p->u.data & (1 << (value & ((1 << 5) - 1))))
regs->eflags |= X86_EFLAGS_CF;
else
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/platform.c Fri Sep 01 13:04:02 2006 -0600
@@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi
instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
return DECODE_success;
+ case 0xBA:
+ if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */
+ {
+ instr->instr = INSTR_BT;
+ GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+ instr->immediate =
+ (signed char)get_immediate(realmode, opcode+1, BYTE);
+ instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
+ instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+ return DECODE_success;
+ }
+ else
+ {
+ printf("0f %x, This opcode subtype isn't handled yet\n", *opcode);
+ return DECODE_failure;
+ }
+
default:
printf("0f %x, This opcode isn't handled yet\n", *opcode);
return DECODE_failure;
@@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig
mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
- index = operand_index(mmio_inst.operand[0]);
- size = operand_size(mmio_inst.operand[0]);
- value = get_reg_value(size, index, 0, regs);
-
+ if ( mmio_inst.operand[0] & REGISTER )
+ {
+ index = operand_index(mmio_inst.operand[0]);
+ size = operand_size(mmio_inst.operand[0]);
+ value = get_reg_value(size, index, 0, regs);
+ }
+ else if ( mmio_inst.operand[0] & IMMEDIATE )
+ {
+ mmio_opp->immediate = mmio_inst.immediate;
+ value = mmio_inst.immediate;
+ }
send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
mmio_inst.op_size, 0, IOREQ_READ, 0);
break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c Fri Sep 01 13:04:02 2006 -0600
@@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void)
ASSERT(vmcb);
/* Check if an Injection is active */
- /* Previous Interrupt delivery caused this Intercept? */
- if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0))
{
- v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
+ /* Previous Interrupt delivery caused this Intercept? */
+ if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
+ v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
// printk("Injecting PF#: saving IRQ from ExitInfo\n");
- vmcb->exitintinfo.bytes = 0;
- re_injecting = 1;
- }
+ vmcb->exitintinfo.bytes = 0;
+ re_injecting = 1;
+ }
/* Guest's interrputs masked? */
rflags = vmcb->rflags;
if (irq_masked(rflags)) {
HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags);
- /* bail out, we won't be injecting an interrupt this time */
- return;
+ /* bail out, we won't be injecting an interrupt this time */
+ return;
}
-
+
/* Previous interrupt still pending? */
if (vmcb->vintr.fields.irq) {
// printk("Re-injecting IRQ from Vintr\n");
@@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void)
if ( v->vcpu_id == 0 )
hvm_pic_assist(v);
+
+ if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+ pic_set_irq(pic, pt->irq, 0);
+ pic_set_irq(pic, pt->irq, 1);
+ }
+
callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
-
- /* Before we deal with PIT interrupts, let's check for
- interrupts set by the device model or paravirtualised event
- channel interrupts.
- */
- if ( cpu_has_pending_irq(v) ) {
- intr_vector = cpu_get_interrupt(v, &intr_type);
- }
- else if ( callback_irq != 0 && local_events_need_delivery() ) {
+ if ( callback_irq != 0 &&
+ local_events_need_delivery() ) {
/*inject para-device call back irq*/
v->vcpu_info->evtchn_upcall_mask = 1;
pic_set_irq(pic, callback_irq, 0);
pic_set_irq(pic, callback_irq, 1);
- intr_vector = callback_irq;
}
- else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
- pic_set_irq(pic, pt->irq, 0);
- pic_set_irq(pic, pt->irq, 1);
+
+ if ( cpu_has_pending_irq(v) )
intr_vector = cpu_get_interrupt(v, &intr_type);
- }
+
}
/* have we got an interrupt to inject? */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Fri Sep 01 13:04:02 2006 -0600
@@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs(
{
/* Returning the guest's regs */
crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
+ crs[2] = v->arch.hvm_svm.cpu_cr2;
crs[3] = v->arch.hvm_svm.cpu_cr3;
crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
}
@@ -2793,9 +2794,7 @@ asmlinkage void svm_vmexit_handler(struc
break;
case VMEXIT_INTR:
- raise_softirq(SCHEDULE_SOFTIRQ);
- break;
-
+ break;
case VMEXIT_INVD:
svm_vmexit_do_invd(vmcb);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Fri Sep 01 13:04:02 2006 -0600
@@ -919,6 +919,20 @@ int cpu_has_apic_interrupt(struct vcpu*
return 0;
}
+/* check to see if there is pending interrupt */
+int cpu_has_pending_irq(struct vcpu *v)
+{
+ struct hvm_domain *plat = &v->domain->arch.hvm_domain;
+
+ /* APIC */
+ if ( cpu_has_apic_interrupt(v) ) return 1;
+
+ /* PIC */
+ if ( !vlapic_accept_pic_intr(v) ) return 0;
+
+ return plat->interrupt_request;
+}
+
void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode)
{
struct vlapic *vlapic = VLAPIC(v);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 13:04:02 2006 -0600
@@ -68,19 +68,6 @@ static inline int is_interruptibility_st
return interruptibility;
}
-/* check to see if there is pending interrupt */
-int cpu_has_pending_irq(struct vcpu *v)
-{
- struct hvm_domain *plat = &v->domain->arch.hvm_domain;
-
- /* APIC */
- if ( cpu_has_apic_interrupt(v) ) return 1;
-
- /* PIC */
- if ( !vlapic_accept_pic_intr(v) ) return 0;
-
- return plat->interrupt_request;
-}
asmlinkage void vmx_intr_assist(void)
{
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 01 13:04:02 2006 -0600
@@ -46,6 +46,8 @@
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
+extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
+
static DEFINE_PER_CPU(unsigned long, trace_values[5]);
#define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value
@@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs(
if ( crs != NULL )
{
__vmread(CR0_READ_SHADOW, &crs[0]);
+ crs[2] = v->arch.hvm_vmx.cpu_cr2;
__vmread(GUEST_CR3, &crs[3]);
__vmread(CR4_READ_SHADOW, &crs[4]);
}
@@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c
bitmaskof(X86_FEATURE_MWAIT) );
edx &= ~( bitmaskof(X86_FEATURE_HT) |
- bitmaskof(X86_FEATURE_MCA) |
- bitmaskof(X86_FEATURE_MCE) |
bitmaskof(X86_FEATURE_ACPI) |
bitmaskof(X86_FEATURE_ACC) );
}
@@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str
unsigned long value;
unsigned long old_cr;
struct vcpu *v = current;
+ struct vlapic *vlapic = VLAPIC(v);
switch ( gp ) {
CASE_GET_REG(EAX, eax);
@@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str
shadow_update_paging_modes(v);
break;
}
+ case 8:
+ {
+ vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+ vlapic_update_ppr(vlapic);
+ break;
+ }
default:
printk("invalid cr: %d\n", gp);
__hvm_bug(regs);
@@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str
*/
static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
{
- unsigned long value;
+ unsigned long value = 0;
struct vcpu *v = current;
-
- if ( cr != 3 )
+ struct vlapic *vlapic = VLAPIC(v);
+
+ if ( cr != 3 && cr != 8)
__hvm_bug(regs);
- value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+ if ( cr == 3 )
+ value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+ else if ( cr == 8 )
+ {
+ value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+ value = (value & 0xF0) >> 4;
+ }
switch ( gp ) {
CASE_SET_REG(EAX, eax);
@@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc
}
rdmsr_safe(regs->ecx, regs->eax, regs->edx);
- break;
+ return;
}
regs->eax = msr_content & 0xFFFFFFFF;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Sep 01 13:04:02 2006 -0600
@@ -2861,11 +2861,11 @@ static int sh_page_fault(struct vcpu *v,
// bunch of 4K maps.
//
+ shadow_lock(d);
+
SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
v->domain->domain_id, v->vcpu_id, va, regs->error_code);
- shadow_lock(d);
-
shadow_audit_tables(v);
if ( guest_walk_tables(v, va, &gw, 1) != 0 )
@@ -3291,12 +3291,6 @@ sh_update_linear_entries(struct vcpu *v)
{
ml3e = __linear_l3_table;
l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
-#if GUEST_PAGING_LEVELS == 2
- /* Shadow l3 tables are made up by update_cr3 */
- sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
- sl3e = v->arch.shadow_vtable;
-#endif
}
else
{
@@ -3306,13 +3300,15 @@ sh_update_linear_entries(struct vcpu *v)
l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
ml3e = sh_map_domain_page(l3mfn);
sh_unmap_domain_page(ml4e);
+ }
+
#if GUEST_PAGING_LEVELS == 2
- /* Shadow l3 tables are made up by update_cr3 */
- sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+ /* Shadow l3 tables are made up by update_cr3 */
+ sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
#else
- sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
-#endif
- }
+ /* Always safe to use shadow_vtable, because it's globally mapped */
+ sl3e = v->arch.shadow_vtable;
+#endif
for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
{
@@ -3324,12 +3320,7 @@ sh_update_linear_entries(struct vcpu *v)
}
if ( v != current )
- {
sh_unmap_domain_page(ml3e);
-#if GUEST_PAGING_LEVELS != 2
- sh_unmap_domain_page(sl3e);
-#endif
- }
}
#elif CONFIG_PAGING_LEVELS == 3
@@ -3361,31 +3352,10 @@ sh_update_linear_entries(struct vcpu *v)
#else /* GUEST_PAGING_LEVELS == 3 */
- /* Use local vcpu's mappings if we can; otherwise make new mappings */
- if ( v == current )
- {
- shadow_l3e = v->arch.shadow_vtable;
- if ( !shadow_mode_external(d) )
- guest_l3e = v->arch.guest_vtable;
- }
- else
- {
- mfn_t smfn;
- int idx;
-
- /* Map the shadow l3 */
- smfn = pagetable_get_mfn(v->arch.shadow_table);
- idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
- shadow_l3e = sh_map_domain_page(smfn);
- shadow_l3e += idx;
- if ( !shadow_mode_external(d) )
- {
- /* Also the guest l3 */
- mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table);
- guest_l3e = sh_map_domain_page(gmfn);
- guest_l3e += guest_index(v->arch.guest_vtable);
- }
- }
+ /* Always safe to use *_vtable, because they're globally mapped */
+ shadow_l3e = v->arch.shadow_vtable;
+ guest_l3e = v->arch.guest_vtable;
+
#endif /* GUEST_PAGING_LEVELS */
/* Choose where to write the entries, using linear maps if possible */
@@ -3443,14 +3413,6 @@ sh_update_linear_entries(struct vcpu *v)
if ( v != current || !shadow_mode_external(d) )
sh_unmap_domain_page(l2e);
-#if GUEST_PAGING_LEVELS == 3
- if ( v != current)
- {
- sh_unmap_domain_page(shadow_l3e);
- if ( !shadow_mode_external(d) )
- sh_unmap_domain_page(guest_l3e);
- }
-#endif
}
#elif CONFIG_PAGING_LEVELS == 2
@@ -3601,7 +3563,7 @@ sh_detach_old_tables(struct vcpu *v)
v->arch.shadow_vtable )
{
// Q: why does this need to use (un)map_domain_page_*global* ?
- //
+ /* A: so sh_update_linear_entries can operate on other vcpus */
sh_unmap_domain_page_global(v->arch.shadow_vtable);
v->arch.shadow_vtable = NULL;
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/physdev.c Fri Sep 01 13:04:02 2006 -0600
@@ -96,10 +96,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
if ( !IS_PRIV(current->domain) )
break;
+ irq = irq_op.irq;
ret = -EINVAL;
- if ( (irq = irq_op.irq) >= NR_IRQS )
+ if ( (irq < 0) || (irq >= NR_IRQS) )
break;
-
+
irq_op.vector = assign_irq_vector(irq);
ret = copy_to_guest(arg, &irq_op, 1) ? -EFAULT : 0;
break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/time.c Fri Sep 01 13:04:02 2006 -0600
@@ -676,7 +676,7 @@ static inline void __update_vcpu_system_
struct vcpu_time_info *u;
t = &this_cpu(cpu_time);
- u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time;
+ u = &v->vcpu_info->time;
version_update_begin(&u->version);
@@ -690,7 +690,7 @@ static inline void __update_vcpu_system_
void update_vcpu_system_time(struct vcpu *v)
{
- if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp !=
+ if ( v->vcpu_info->time.tsc_timestamp !=
this_cpu(cpu_time).local_tsc_stamp )
__update_vcpu_system_time(v);
}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/traps.c Fri Sep 01 13:04:02 2006 -0600
@@ -339,7 +339,6 @@ asmlinkage void fatal_trap(int trapnr, s
asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
{
int cpu = smp_processor_id();
- unsigned long cr2;
static char *trapstr[] = {
"divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
"invalid opcode", "device not available", "double fault",
@@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s
if ( trapnr == TRAP_page_fault )
{
- __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
+ unsigned long cr2 = read_cr2();
printk("Faulting linear address: %p\n", _p(cr2));
show_page_walk(cr2);
}
@@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_
ASSERT(!in_irq());
- __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
+ addr = read_cr2();
DEBUGGER_trap_entry(TRAP_page_fault, regs);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_32/traps.c Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,28 @@
/* All CPUs have their own IDT to allow int80 direct trap. */
idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
+static void print_xen_info(void)
+{
+ char taint_str[TAINT_STRING_MAX_LEN];
+ char debug = 'n', *arch = "x86_32";
+
+#ifndef NDEBUG
+ debug = 'y';
+#endif
+
+#ifdef CONFIG_X86_PAE
+ arch = "x86_32p";
+#endif
+
+ printk("----[ Xen-%d.%d%s %s debug=%c %s ]----\n",
+ xen_major_version(), xen_minor_version(), xen_extra_version(),
+ arch, debug, print_tainted(taint_str));
+}
+
void show_registers(struct cpu_user_regs *regs)
{
struct cpu_user_regs fault_regs = *regs;
unsigned long fault_crs[8];
- char taint_str[TAINT_STRING_MAX_LEN];
const char *context;
if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs
}
else
{
- context = guest_mode(regs) ? "guest" : "hypervisor";
-
if ( !guest_mode(regs) )
{
+ context = "hypervisor";
fault_regs.esp = (unsigned long)®s->esp;
fault_regs.ss = read_segment_register(ss);
fault_regs.ds = read_segment_register(ds);
fault_regs.es = read_segment_register(es);
fault_regs.fs = read_segment_register(fs);
fault_regs.gs = read_segment_register(gs);
+ fault_crs[2] = read_cr2();
+ }
+ else
+ {
+ context = "guest";
+ fault_crs[2] = current->vcpu_info->arch.cr2;
}
fault_crs[0] = read_cr0();
fault_crs[3] = read_cr3();
- }
-
- printk("----[ Xen-%d.%d%s %s ]----\n",
- xen_major_version(), xen_minor_version(), xen_extra_version(),
- print_tainted(taint_str));
+ fault_crs[4] = read_cr4();
+ }
+
+ print_xen_info();
printk("CPU: %d\nEIP: %04x:[<%08x>]",
smp_processor_id(), fault_regs.cs, fault_regs.eip);
if ( !guest_mode(regs) )
@@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs
fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
- printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]);
+ printk("cr0: %08lx cr4: %08lx cr3: %08lx cr2: %08lx\n",
+ fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void)
{
struct tss_struct *tss = &doublefault_tss;
unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
- char taint_str[TAINT_STRING_MAX_LEN];
watchdog_disable();
@@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void)
/* Find information saved during fault and dump it to the console. */
tss = &init_tss[cpu];
- printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n",
- xen_major_version(), xen_minor_version(), xen_extra_version(),
- print_tainted(taint_str));
+ printk("*** DOUBLE FAULT ***\n");
+ print_xen_info();
printk("CPU: %d\nEIP: %04x:[<%08x>]",
cpu, tss->cs, tss->eip);
print_symbol(" %s\n", tss->eip);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_64/traps.c Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,24 @@
#include <public/callback.h>
+static void print_xen_info(void)
+{
+ char taint_str[TAINT_STRING_MAX_LEN];
+ char debug = 'n';
+
+#ifndef NDEBUG
+ debug = 'y';
+#endif
+
+ printk("----[ Xen-%d.%d%s x86_64 debug=%c %s ]----\n",
+ xen_major_version(), xen_minor_version(), xen_extra_version(),
+ debug, print_tainted(taint_str));
+}
+
void show_registers(struct cpu_user_regs *regs)
{
struct cpu_user_regs fault_regs = *regs;
unsigned long fault_crs[8];
- char taint_str[TAINT_STRING_MAX_LEN];
const char *context;
if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs
}
else
{
- context = guest_mode(regs) ? "guest" : "hypervisor";
+ if ( guest_mode(regs) )
+ {
+ context = "guest";
+ fault_crs[2] = current->vcpu_info->arch.cr2;
+ }
+ else
+ {
+ context = "hypervisor";
+ fault_crs[2] = read_cr2();
+ }
+
fault_crs[0] = read_cr0();
fault_crs[3] = read_cr3();
+ fault_crs[4] = read_cr4();
fault_regs.ds = read_segment_register(ds);
fault_regs.es = read_segment_register(es);
fault_regs.fs = read_segment_register(fs);
fault_regs.gs = read_segment_register(gs);
}
- printk("----[ Xen-%d.%d%s %s ]----\n",
- xen_major_version(), xen_minor_version(), xen_extra_version(),
- print_tainted(taint_str));
+ print_xen_info();
printk("CPU: %d\nRIP: %04x:[<%016lx>]",
smp_processor_id(), fault_regs.cs, fault_regs.rip);
if ( !guest_mode(regs) )
@@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs
fault_regs.r9, fault_regs.r10, fault_regs.r11);
printk("r12: %016lx r13: %016lx r14: %016lx\n",
fault_regs.r12, fault_regs.r13, fault_regs.r14);
- printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
- fault_regs.r15, fault_crs[0], fault_crs[3]);
+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+ fault_regs.r15, fault_crs[0], fault_crs[4]);
+ printk("cr3: %016lx cr2: %016lx\n", fault_crs[3], fault_crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -121,7 +144,6 @@ asmlinkage void do_double_fault(struct c
asmlinkage void do_double_fault(struct cpu_user_regs *regs)
{
unsigned int cpu, tr;
- char taint_str[TAINT_STRING_MAX_LEN];
asm ( "str %0" : "=r" (tr) );
cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
@@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c
console_force_unlock();
/* Find information saved during fault and dump it to the console. */
- printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n",
- xen_major_version(), xen_minor_version(), xen_extra_version(),
- print_tainted(taint_str));
+ printk("*** DOUBLE FAULT ***\n");
+ print_xen_info();
printk("CPU: %d\nRIP: %04x:[<%016lx>]",
cpu, regs->cs, regs->rip);
print_symbol(" %s", regs->rip);
diff -r 4ba098226429 -r 1bab7d65171b xen/common/perfc.c
--- a/xen/common/perfc.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/common/perfc.c Fri Sep 01 13:04:02 2006 -0600
@@ -136,8 +136,8 @@ static xen_sysctl_perfc_val_t *perfc_val
static xen_sysctl_perfc_val_t *perfc_vals;
static int perfc_nbr_vals;
static int perfc_init = 0;
-static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
- XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
+static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc,
+ XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val)
{
unsigned int i, j;
unsigned int v = 0;
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-ia64/mm.h Fri Sep 01 13:04:02 2006 -0600
@@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev
#define INVALID_M2P_ENTRY (~0UL)
#define VALID_M2P(_e) (!((_e) & (1UL<<63)))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
#define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
#define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/config.h Fri Sep 01 13:04:02 2006 -0600
@@ -47,11 +47,13 @@ extern char __bss_start[];
/* this should be per processor, but for now */
#define CACHE_LINE_SIZE 128
+/* 256M - 64M of Xen space seems like a nice number */
+#define CONFIG_MIN_DOM0_PAGES (192 << (20 - PAGE_SHIFT))
#define CONFIG_SHADOW 1
#define CONFIG_GDB 1
#define CONFIG_SMP 1
#define CONFIG_PCI 1
-#define NR_CPUS 1
+#define NR_CPUS 16
#ifndef ELFSIZE
#define ELFSIZE 64
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/current.h
--- a/xen/include/asm-powerpc/current.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/current.h Fri Sep 01 13:04:02 2006 -0600
@@ -27,7 +27,7 @@
struct vcpu;
-register struct processor_area *parea asm("r13");
+register volatile struct processor_area *parea asm("r13");
static inline struct vcpu *get_current(void)
{
@@ -66,7 +66,7 @@ static inline struct cpu_user_regs *gues
static inline void reset_stack_and_jump(void (*f)(void))
{
- void _reset_stack_and_jump(void (*f)(void), struct cpu_user_regs *regs);
+ void _reset_stack_and_jump(void (*)(void), struct cpu_user_regs *);
struct cpu_user_regs *regs = guest_cpu_user_regs();
#ifdef TRACK_RESUME
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/domain.h Fri Sep 01 13:04:02 2006 -0600
@@ -38,15 +38,14 @@ struct arch_domain {
struct page_info *rma_page;
uint rma_order;
- /* This is regular memory, only available thru translataion */
- ulong logical_base_pfn;
- ulong logical_end_pfn;
+ /* list of extents beyond RMA */
+ struct list_head extent_list;
/* I/O-port access bitmap mask. */
u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */
uint large_page_sizes;
- char large_page_shift[4];
+ uint large_page_order[4];
} __cacheline_aligned;
struct slb_entry {
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/htab.h Fri Sep 01 13:04:02 2006 -0600
@@ -133,8 +133,4 @@ struct domain_htab {
union pte *map; /* access the htab like an array */
ulong *shadow; /* idx -> logical translation array */
};
-
-struct domain;
-extern void htab_alloc(struct domain *d, uint order);
-extern void htab_free(struct domain *d);
#endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/mm.h Fri Sep 01 13:04:02 2006 -0600
@@ -24,6 +24,7 @@
#include <public/xen.h>
#include <xen/list.h>
#include <xen/types.h>
+#include <xen/mm.h>
#include <asm/misc.h>
#include <asm/system.h>
#include <asm/flushtlb.h>
@@ -33,7 +34,6 @@
#define memguard_unguard_range(_p,_l) ((void)0)
extern unsigned long xenheap_phys_end;
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
/*
* Per-page-frame information.
@@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end;
* 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
*/
#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
-#define PRtype_info "016lx"
/* XXX copy-and-paste job; re-examine me */
struct page_info
@@ -63,7 +62,7 @@ struct page_info
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
/* Owner of this page (NULL if page is anonymous). */
- struct domain *_domain;
+ u32 _domain;
/* Type reference count and various PGT_xxx flags and fields. */
unsigned long type_info;
} inuse;
@@ -80,80 +79,132 @@ struct page_info
};
+struct page_extents {
+ /* Each frame can be threaded onto a doubly-linked list. */
+ struct list_head pe_list;
+
+ /* page extent */
+ struct page_info *pg;
+ uint order;
+ ulong pfn;
+};
+
/* The following page types are MUTUALLY EXCLUSIVE. */
#define PGT_none (0<<29) /* no special uses of this page */
-#define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */
-#define PGT_gdt_page (5<<29) /* using this page in a GDT? */
-#define PGT_ldt_page (6<<29) /* using this page in an LDT? */
+#define PGT_RMA (1<<29) /* This page is an RMA page? */
#define PGT_writable_page (7<<29) /* has writable mappings of this page? */
#define PGT_type_mask (7<<29) /* Bits 29-31. */
+
+ /* Owning guest has pinned this page to its current type? */
+#define _PGT_pinned 28
+#define PGT_pinned (1U<<_PGT_pinned)
/* Has this page been validated for use as its current type? */
-#define _PGT_validated 28
+#define _PGT_validated 27
#define PGT_validated (1U<<_PGT_validated)
- /* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned 27
-#define PGT_pinned (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift 17
-#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 32
+#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
/* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<17)-1)
+#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<16)-1)
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
#define PGC_allocated (1U<<_PGC_allocated)
- /* 31-bit count of references to this frame. */
-#define PGC_count_mask ((1U<<31)-1)
+ /* Set on a *guest* page to mark it out-of-sync with its shadow */
+#define _PGC_out_of_sync 30
+#define PGC_out_of_sync (1U<<_PGC_out_of_sync)
+ /* Set when is using a page as a page table */
+#define _PGC_page_table 29
+#define PGC_page_table (1U<<_PGC_page_table)
+ /* 29-bit count of references to this frame. */
+#define PGC_count_mask ((1U<<29)-1)
+
+#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
+
+static inline u32 pickle_domptr(struct domain *domain)
+{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+
+#define PRtype_info "016lx"/* should only be used for printk's */
+
+#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+extern struct page_info *frame_table;
+extern unsigned long max_page;
+extern unsigned long total_pages;
+void init_frametable(void);
static inline void put_page(struct page_info *page)
{
-#if 0
- int count;
-
- count = atomic_dec_return(&page->count_info);
-
- if ( unlikely((count & PGC_count_mask) == 0) )
+ u32 nx, x, y = page->count_info;
+
+ do {
+ x = y;
+ nx = x - 1;
+ }
+ while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
+
+ if ( unlikely((nx & PGC_count_mask) == 0) ) {
+ panic("about to free page\n");
free_domheap_page(page);
-#else
- trap();
-#endif
+ }
}
static inline int get_page(struct page_info *page,
struct domain *domain)
{
-#if 0
- int count;
-
- count = atomic_inc_return(&page->count_info);
-
- if (((count & PGC_count_mask) == 0) || /* Count overflow? */
- ((count & PGC_count_mask) == 1) || /* Wasn't allocated? */
- ((page->domain != domain))) /* Wrong owner? */
- {
- atomic_dec(&page->count_info);
- return 0;
- }
-
-#else
- trap();
-#endif
+ u32 x, nx, y = page->count_info;
+ u32 d, nd = page->u.inuse._domain;
+ u32 _domain = pickle_domptr(domain);
+
+ do {
+ x = y;
+ nx = x + 1;
+ d = nd;
+ if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
+ unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+ unlikely(d != _domain) ) /* Wrong owner? */
+ {
+ return 0;
+ }
+ y = cmpxchg(&page->count_info, x, nx);
+ }
+ while ( unlikely(y != x) );
+
return 1;
+}
+
+extern void put_page_type(struct page_info *page);
+extern int get_page_type(struct page_info *page, unsigned long type);
+
+static inline void put_page_and_type(struct page_info *page)
+{
+ put_page_type(page);
+ put_page(page);
}
static inline int get_page_and_type(struct page_info *page,
struct domain *domain,
- u32 type)
-{
- trap();
- return 1;
+ unsigned long type)
+{
+ int rc = get_page(page, domain);
+
+ if ( likely(rc) && unlikely(!get_page_type(page, type)) )
+ {
+ put_page(page);
+ rc = 0;
+ }
+
+ return rc;
}
static inline int page_is_removable(struct page_info *page)
@@ -161,16 +212,9 @@ static inline int page_is_removable(stru
return ((page->count_info & PGC_count_mask) == 1);
}
-int get_page_type(struct page_info *page, u32 type);
-
#define set_machinetophys(_mfn, _pfn) (trap(), 0)
extern void synchronise_pagetables(unsigned long cpu_mask);
-
-static inline void put_page_and_type(struct page_info *page)
-{
- trap();
-}
/* XXX don't know what this is for */
typedef struct {
@@ -179,17 +223,10 @@ typedef struct {
} vm_assist_info_t;
extern vm_assist_info_t vm_assist_info[];
-#define page_get_owner(_p) ((_p)->u.inuse._domain)
-#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d)
-
#define share_xen_page_with_guest(p, d, r) do { } while (0)
#define share_xen_page_with_privileged_guests(p, r) do { } while (0)
-extern struct page_info *frame_table;
extern unsigned long frame_table_size;
-extern unsigned long max_page;
-extern unsigned long total_pages;
-void init_frametable(void);
/* hope that accesses to this will fail spectacularly */
#define machine_to_phys_mapping ((u32 *)-1UL)
@@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig
struct domain *,
struct vcpu *);
-extern void put_page_type(struct page_info *page);
-
-#define PFN_TYPE_RMA 0
-#define PFN_TYPE_LOGICAL 1
-#define PFN_TYPE_IO 2
-extern ulong pfn2mfn(struct domain *d, long mfn, int *type);
+#define PFN_TYPE_RMA 1
+#define PFN_TYPE_LOGICAL 2
+#define PFN_TYPE_IO 3
+#define PFN_TYPE_REMOTE 4
+
+extern ulong pfn2mfn(struct domain *d, long pfn, int *type);
/* Arch-specific portion of memory_op hypercall. */
long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
@@ -221,6 +258,10 @@ static inline unsigned long gmfn_to_mfn(
#define mfn_to_gmfn(_d, mfn) (mfn)
+extern int allocate_rma(struct domain *d, unsigned int order_pages);
+extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
+extern void free_extents(struct domain *d);
+
extern int steal_page(struct domain *d, struct page_info *page,
unsigned int memflags);
diff -r 4ba098226429 -r 1bab7d65171b
xen/include/asm-powerpc/powerpc64/procarea.h
--- a/xen/include/asm-powerpc/powerpc64/procarea.h Fri Sep 01 12:52:12
2006 -0600
+++ b/xen/include/asm-powerpc/powerpc64/procarea.h Fri Sep 01 13:04:02
2006 -0600
@@ -28,6 +28,7 @@ struct gdb_state;
struct processor_area
{
+ unsigned int whoami;
struct vcpu *cur_vcpu;
void *hyp_stack_base;
ulong saved_regs[2];
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/processor.h Fri Sep 01 13:04:02 2006 -0600
@@ -39,8 +39,11 @@ struct cpu_user_regs;
struct cpu_user_regs;
extern void show_registers(struct cpu_user_regs *);
extern void show_execution_state(struct cpu_user_regs *);
-extern unsigned int cpu_rma_order(void);
-extern void cpu_initialize(void);
+extern void show_backtrace(ulong sp, ulong lr, ulong pc);
+extern unsigned int cpu_extent_order(void);
+extern unsigned int cpu_default_rma_order_pages(void);
+extern uint cpu_large_page_orders(uint *sizes, uint max);
+extern void cpu_initialize(int cpuid);
extern void cpu_init_vcpu(struct vcpu *);
extern void save_cpu_sprs(struct vcpu *);
extern void load_cpu_sprs(struct vcpu *);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/shadow.h
--- a/xen/include/asm-powerpc/shadow.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/shadow.h Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
*
* Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
*/
@@ -55,4 +55,18 @@ static inline void mark_dirty(struct dom
{
return;
}
+#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
+
+extern int shadow_domctl(struct domain *d,
+ xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+extern unsigned int shadow_teardown(struct domain *d);
+extern unsigned int shadow_set_allocation(
+ struct domain *d, unsigned int megabytes, int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+ return (1ULL << (d->arch.htab.order + PAGE_SHIFT)) >> 20;
+}
#endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/smp.h Fri Sep 01 13:04:02 2006 -0600
@@ -28,8 +28,8 @@ extern int smp_num_siblings;
/* revisit when we support SMP */
#define get_hard_smp_processor_id(i) i
-#define hard_smp_processor_id() 0
-#define raw_smp_processor_id() 0
+#define raw_smp_processor_id() (parea->whoami)
+#define hard_smp_processor_id() raw_smp_processor_id()
extern cpumask_t cpu_sibling_map[];
extern cpumask_t cpu_core_map[];
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/types.h
--- a/xen/include/asm-powerpc/types.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/types.h Fri Sep 01 13:04:02 2006 -0600
@@ -3,8 +3,18 @@
#ifndef _PPC_TYPES_H
#define _PPC_TYPES_H
+#include <xen/config.h>
+
+#if defined(__ppc__)
+#define BYTES_PER_LONG 4
+#define BITS_PER_LONG 32
+#elif defined(__PPC64__)
+#define BYTES_PER_LONG 8
+#define BITS_PER_LONG 64
+#endif
+
+#ifndef __ASSEMBLY__
typedef unsigned short umode_t;
-
/*
* __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
@@ -31,8 +41,6 @@ typedef unsigned long __u64;
#endif
#endif
-#include <xen/config.h>
-
typedef signed char s8;
typedef unsigned char u8;
@@ -45,14 +53,10 @@ typedef unsigned int u32;
#if defined(__ppc__)
typedef signed long long s64;
typedef unsigned long long u64;
-#define BYTES_PER_LONG 4
-#define BITS_PER_LONG 32
typedef unsigned int size_t;
#elif defined(__PPC64__)
typedef signed long s64;
typedef unsigned long u64;
-#define BYTES_PER_LONG 8
-#define BITS_PER_LONG 64
typedef unsigned long size_t;
#endif
@@ -66,4 +70,5 @@ typedef u64 dma64_addr_t;
typedef unsigned short xmem_bufctl_t;
+#endif /* __ASSEMBLY__ */
#endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/mm.h Fri Sep 01 13:04:02 2006 -0600
@@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct
#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
#define INVALID_M2P_ENTRY (~0UL)
#define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
#define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
#define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -300,13 +300,6 @@ void setup_idle_pagetable(void);
#define _PAGE_GNTTAB 0
#endif
-/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL.
- * Also disallow GNTTAB if we are using it for grant-table debugging.
- * Permit the NX bit if the hardware supports it.
- */
-#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX)
-
#define __PAGE_HYPERVISOR \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
#define __PAGE_HYPERVISOR_NOCACHE \
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/processor.h Fri Sep 01 13:04:02 2006 -0600
@@ -288,6 +288,13 @@ static inline void write_cr0(unsigned lo
static inline void write_cr0(unsigned long val)
{
__asm__("mov %0,%%cr0": :"r" ((unsigned long)val));
+}
+
+static inline unsigned long read_cr2(void)
+{
+ unsigned long __cr2;
+ __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2));
+ return __cr2;
}
static inline unsigned long read_cr4(void)
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-2level.h Fri Sep 01 13:04:02 2006 -0600
@@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t;
#define get_pte_flags(x) ((int)(x) & 0xFFF)
#define put_pte_flags(x) ((intpte_t)((x) & 0xFFF))
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
-
#endif /* __X86_32_PAGE_2LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-3level.h Fri Sep 01 13:04:02 2006 -0600
@@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t;
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
#define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
#endif /* __X86_32_PAGE_3LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
#define GRANT_PTE_FLAGS \
(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ */
+#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+
#endif /* __X86_32_PAGE_H__ */
/*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_64/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t;
#define _PAGE_NX_BIT (1U<<23)
#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U)
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ * Note that range [62:52] is available for software use on x86/64.
+ */
+#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-ia64.h Fri Sep 01 13:04:02 2006 -0600
@@ -18,15 +18,12 @@
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name
#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
#ifdef __XEN_TOOLS__
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
#endif
#ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
/* Guest handles for primitive C types. */
__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-powerpc.h Fri Sep 01 13:04:02 2006 -0600
@@ -29,7 +29,6 @@
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name
#define set_xen_guest_handle(hnd, val) \
do { \
if (sizeof ((hnd).__pad)) \
@@ -42,8 +41,6 @@
#endif
#ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
/* Guest handles for primitive C types. */
__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_32.h Fri Sep 01 13:04:02 2006 -0600
@@ -28,14 +28,7 @@
#endif
/* Structural guest handles introduced in 0x00030201. */
-#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__)
-typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t;
-#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
- typedef struct { type *p; } \
- __guest_handle_ ## name; \
- typedef struct { union { type *p; uint64_aligned_t q; }; } \
- __guest_handle_64_ ## name
-#elif __XEN_INTERFACE_VERSION__ >= 0x00030201
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
typedef struct { type *p; } __guest_handle_ ## name
#else
@@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned(
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
#ifdef __XEN_TOOLS__
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
-#define set_xen_guest_handle(hnd, val) \
- do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \
- (hnd).p = val; \
- } while ( 0 )
-#else
-#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
#endif
#ifndef __ASSEMBLY__
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_64.h Fri Sep 01 13:04:02 2006 -0600
@@ -39,15 +39,12 @@
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name
#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
#ifdef __XEN_TOOLS__
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
#endif
#ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
/* Guest handles for primitive C types. */
__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/domctl.h Fri Sep 01 13:04:02 2006 -0600
@@ -16,12 +16,10 @@
#include "xen.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003
struct xenctl_cpumap {
- XEN_GUEST_HANDLE_64(uint8_t) bitmap;
+ XEN_GUEST_HANDLE(uint8_t) bitmap;
uint32_t nr_cpus;
};
@@ -72,8 +70,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdo
#define XEN_DOMCTL_getmemlist 6
struct xen_domctl_getmemlist {
/* IN variables. */
+ /* Max entries to write to output buffer. */
uint64_t max_pfns;
- XEN_GUEST_HANDLE_64(ulong) buffer;
+ /* Start index in guest's page list. */
+ uint64_t start_pfn;
+ XEN_GUEST_HANDLE(xen_pfn_t) buffer;
/* OUT variables. */
uint64_t num_pfns;
};
@@ -110,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 {
/* IN variables. */
uint64_t num;
/* IN/OUT variables. */
- XEN_GUEST_HANDLE_64(ulong) array;
+ XEN_GUEST_HANDLE(ulong) array;
};
typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
@@ -184,7 +185,7 @@ struct xen_domctl_shadow_op {
uint32_t mb; /* Shadow memory allocation in MB */
/* OP_PEEK / OP_CLEAN */
- XEN_GUEST_HANDLE_64(ulong) dirty_bitmap;
+ XEN_GUEST_HANDLE(ulong) dirty_bitmap;
uint64_t pages; /* Size of buffer. Updated with actual size. */
struct xen_domctl_shadow_op_stats stats;
};
@@ -204,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m
#define XEN_DOMCTL_setvcpucontext 12
#define XEN_DOMCTL_getvcpucontext 13
struct xen_domctl_vcpucontext {
- uint32_t vcpu; /* IN */
- XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+ uint32_t vcpu; /* IN */
+ XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */
};
typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
@@ -378,8 +379,6 @@ typedef struct xen_domctl xen_domctl_t;
typedef struct xen_domctl xen_domctl_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
-#undef uint64_t
-
#endif /* __XEN_PUBLIC_DOMCTL_H__ */
/*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/sysctl.h Fri Sep 01 13:04:02 2006 -0600
@@ -16,9 +16,7 @@
#include "xen.h"
#include "domctl.h"
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
/*
* Read console content from Xen buffer ring.
@@ -26,8 +24,8 @@
#define XEN_SYSCTL_readconsole 1
struct xen_sysctl_readconsole {
/* IN variables. */
- uint32_t clear; /* Non-zero -> clear after reading. */
- XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */
+ uint32_t clear; /* Non-zero -> clear after reading. */
+ XEN_GUEST_HANDLE(char) buffer; /* Buffer start */
/* IN/OUT variables. */
uint32_t count; /* In: Buffer size; Out: Used buffer size */
};
@@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op {
uint32_t nr_counters; /* number of counters description */
uint32_t nr_vals; /* number of values */
/* counter information (or NULL) */
- XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+ XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
/* counter values (or NULL) */
- XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+ XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val;
};
typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
@@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist {
/* IN variables. */
domid_t first_domain;
uint32_t max_domains;
- XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+ XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer;
/* OUT variables. */
uint32_t num_domains;
};
@@ -140,8 +138,6 @@ typedef struct xen_sysctl xen_sysctl_t;
typedef struct xen_sysctl xen_sysctl_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
-#undef uint64_t
-
#endif /* __XEN_PUBLIC_SYSCTL_H__ */
/*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/xen.h
--- a/xen/include/public/xen.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/xen.h Fri Sep 01 13:04:02 2006 -0600
@@ -63,6 +63,7 @@
#define __HYPERVISOR_hvm_op 34
#define __HYPERVISOR_sysctl 35
#define __HYPERVISOR_domctl 36
+#define __HYPERVISOR_kexec_op 37
/* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/FlatDeviceTree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/FlatDeviceTree.py Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+import sys
+import struct
+import stat
+import re
+
+_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
+_OF_DT_BEGIN_NODE = 0x1
+_OF_DT_END_NODE = 0x2
+_OF_DT_PROP = 0x3
+_OF_DT_END = 0x9
+
+def _bincat(seq, separator=''):
+ '''Concatenate the contents of seq into a bytestream.'''
+ strs = []
+ for item in seq:
+ if type(item) == type(0):
+ strs.append(struct.pack(">I", item))
+ else:
+ try:
+ strs.append(item.to_bin())
+ except AttributeError, e:
+ strs.append(item)
+ return separator.join(strs)
+
+def _alignup(val, alignment):
+ return (val + alignment - 1) & ~(alignment - 1)
+
+def _pad(buf, alignment):
+ '''Pad bytestream with NULLs to specified alignment.'''
+ padlen = _alignup(len(buf), alignment)
+ return buf + '\0' * (padlen - len(buf))
+ # not present in Python 2.3:
+ #return buf.ljust(_padlen, '\0')
+
+def _indent(item):
+ indented = []
+ for line in str(item).splitlines(True):
+ indented.append(' ' + line)
+ return ''.join(indented)
+
+class _Property:
+ _nonprint = re.compile('[\000-\037\200-\377]')
+ def __init__(self, node, name, value):
+ self.node = node
+ self.value = value
+ self.name = name
+ self.node.tree.stradd(name)
+
+ def __str__(self):
+ result = self.name
+ if self.value:
+ searchtext = self.value
+ # it's ok for a string to end in NULL
+ if searchtext.find('\000') == len(searchtext)-1:
+ searchtext = searchtext[:-1]
+ m = self._nonprint.search(searchtext)
+ if m:
+ bytes = struct.unpack("B" * len(self.value), self.value)
+ hexbytes = [ '%02x' % b for b in bytes ]
+ words = []
+ for i in range(0, len(self.value), 4):
+ words.append(''.join(hexbytes[i:i+4]))
+ v = '<' + ' '.join(words) + '>'
+ else:
+ v = '"%s"' % self.value
+ result += ': ' + v
+ return result
+
+ def to_bin(self):
+ offset = self.node.tree.stroffset(self.name)
+ return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \
+ + _pad(self.value, 4)
+
+class _Node:
+ def __init__(self, tree, name):
+ self.tree = tree
+ self.name = name
+ self.props = {}
+ self.children = {}
+ self.phandle = 0
+
+ def __str__(self):
+ propstrs = [ _indent(prop) for prop in self.props.values() ]
+ childstrs = [ _indent(child) for child in self.children.values() ]
+ return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs),
+ '\n'.join(childstrs))
+
+ def to_bin(self):
+ name = _pad(self.name + '\0', 4)
+ return struct.pack('>I', _OF_DT_BEGIN_NODE) + \
+ name + \
+ _bincat(self.props.values()) + \
+ _bincat(self.children.values()) + \
+ struct.pack('>I', _OF_DT_END_NODE)
+
+ def addprop(self, propname, *cells):
+ '''setprop with duplicate error-checking.'''
+ if propname in self.props:
+ raise AttributeError('%s/%s already exists' % (self.name,
propname))
+ self.setprop(propname, *cells)
+
+ def setprop(self, propname, *cells):
+ self.props[propname] = _Property(self, propname, _bincat(cells))
+
+ def addnode(self, nodename):
+ '''newnode with duplicate error-checking.'''
+ if nodename in self.children:
+ raise AttributeError('%s/%s already exists' % (self.name,
nodename))
+ return self.newnode(nodename)
+
+ def newnode(self, nodename):
+ node = _Node(self.tree, nodename)
+ self.children[nodename] = node
+ return node
+
+ def getprop(self, propname):
+ return self.props[propname]
+
+ def getchild(self, nodename):
+ return self.children[nodename]
+
+ def get_phandle(self):
+ if self.phandle:
+ return self.phandle
+ self.phandle = self.tree.alloc_phandle()
+ self.addprop('linux,phandle', self.phandle)
+ return self.phandle
+
+class _Header:
+ def __init__(self):
+ self.magic = 0
+ self.totalsize = 0
+ self.off_dt_struct = 0
+ self.off_dt_strings = 0
+ self.off_mem_rsvmap = 0
+ self.version = 0
+ self.last_comp_version = 0
+ self.boot_cpuid_phys = 0
+ self.size_dt_strings = 0
+ def to_bin(self):
+ return struct.pack('>9I',
+ self.magic,
+ self.totalsize,
+ self.off_dt_struct,
+ self.off_dt_strings,
+ self.off_mem_rsvmap,
+ self.version,
+ self.last_comp_version,
+ self.boot_cpuid_phys,
+ self.size_dt_strings)
+
+class _StringBlock:
+ def __init__(self):
+ self.table = []
+ def to_bin(self):
+ return _bincat(self.table, '\0') + '\0'
+ def add(self, str):
+ self.table.append(str)
+ def getoffset(self, str):
+ return self.to_bin().index(str + '\0')
+
+class Tree(_Node):
+ def __init__(self):
+ self.last_phandle = 0
+ self.strings = _StringBlock()
+ self.reserved = [(0, 0)]
+ _Node.__init__(self, self, '\0')
+
+ def alloc_phandle(self):
+ self.last_phandle += 1
+ return self.last_phandle
+
+ def stradd(self, str):
+ return self.strings.add(str)
+
+ def stroffset(self, str):
+ return self.strings.getoffset(str)
+
+ def reserve(self, start, len):
+ self.reserved.insert(0, (start, len))
+
+ def to_bin(self):
+ # layout:
+ # header
+ # reservation map
+ # string block
+ # data block
+
+ datablock = _Node.to_bin(self)
+
+ r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ]
+ reserved = _bincat(r)
+
+ strblock = _pad(self.strings.to_bin(), 4)
+ strblocklen = len(strblock)
+
+ header = _Header()
+ header.magic = _OF_DT_HEADER
+ header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8)
+ header.off_dt_strings = header.off_mem_rsvmap + len(reserved)
+ header.off_dt_struct = header.off_dt_strings + strblocklen
+ header.version = 0x10
+ header.last_comp_version = 0x10
+ header.boot_cpuid_phys = 0
+ header.size_dt_strings = strblocklen
+
+ payload = reserved + \
+ strblock + \
+ datablock + \
+ struct.pack('>I', _OF_DT_END)
+ header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
+ return _pad(header.to_bin(), 8) + payload
+
+_host_devtree_root = '/proc/device-tree'
+def _getprop(propname):
+ '''Extract a property from the system's device tree.'''
+ f = file(os.path.join(_host_devtree_root, propname), 'r')
+ data = f.read()
+ f.close()
+ return data
+
+def _copynode(node, dirpath, propfilter):
+ '''Extract all properties from a node in the system's device tree.'''
+ dirents = os.listdir(dirpath)
+ for dirent in dirents:
+ fullpath = os.path.join(dirpath, dirent)
+ st = os.lstat(fullpath)
+ if stat.S_ISDIR(st.st_mode):
+ child = node.addnode(dirent)
+ _copytree(child, fullpath, propfilter)
+ elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
+ node.addprop(dirent, _getprop(fullpath))
+
+def _copytree(node, dirpath, propfilter):
+ path = os.path.join(_host_devtree_root, dirpath)
+ _copynode(node, path, propfilter)
+
+def build(imghandler):
+ '''Construct a device tree by combining the domain's configuration and
+ the host's device tree.'''
+ root = Tree()
+
+ # 4 pages: start_info, console, store, shared_info
+ root.reserve(0x3ffc000, 0x4000)
+
+ root.addprop('device_type', 'chrp-but-not-really\0')
+ root.addprop('#size-cells', 2)
+ root.addprop('#address-cells', 2)
+ root.addprop('model', 'Momentum,Maple-D\0')
+ root.addprop('compatible', 'Momentum,Maple\0')
+
+ xen = root.addnode('xen')
+ xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
+ xen.addprop('version', 'Xen-3.0-unstable\0')
+ xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
+ xen.addprop('domain-name', imghandler.vm.getName() + '\0')
+ xencons = xen.addnode('console')
+ xencons.addprop('interrupts', 1, 0)
+
+ # XXX split out RMA node
+ mem = root.addnode('memory@0')
+ totalmem = imghandler.vm.getMemoryTarget() * 1024
+ mem.addprop('reg', 0, 0, 0, totalmem)
+ mem.addprop('device_type', 'memory\0')
+
+ cpus = root.addnode('cpus')
+ cpus.addprop('smp-enabled')
+ cpus.addprop('#size-cells', 0)
+ cpus.addprop('#address-cells', 1)
+
+ # Copy all properties the system firmware gave us, except for 'linux,'
+ # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
+ # identical...
+ cpu0 = None
+ def _nolinuxprops(fullpath):
+ return not os.path.basename(fullpath).startswith('linux,')
+ for i in range(imghandler.vm.getVCpuCount()):
+ cpu = cpus.addnode('PowerPC,970@0')
+ _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
+ # and then overwrite what we need to
+ pft_size = imghandler.vm.info.get('pft-size', 0x14)
+ cpu.setprop('ibm,pft-size', 0, pft_size)
+
+ # set default CPU
+ if cpu0 == None:
+ cpu0 = cpu
+
+ chosen = root.addnode('chosen')
+ chosen.addprop('cpu', cpu0.get_phandle())
+ chosen.addprop('memory', mem.get_phandle())
+ chosen.addprop('linux,stdout-path', '/xen/console\0')
+ chosen.addprop('interrupt-controller', xen.get_phandle())
+ chosen.addprop('bootargs', imghandler.cmdline + '\0')
+ # xc_linux_load.c will overwrite these 64-bit properties later
+ chosen.addprop('linux,initrd-start', 0, 0)
+ chosen.addprop('linux,initrd-end', 0, 0)
+
+ if 1:
+ f = file('/tmp/domU.dtb', 'w')
+ f.write(root.to_bin())
+ f.close()
+
+ return root
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/arch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/arch.py Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+
+_types = {
+ "i386": "x86",
+ "i486": "x86",
+ "i586": "x86",
+ "i686": "x86",
+ "x86_64": "x86",
+ "ia64": "ia64",
+ "ppc": "powerpc",
+ "ppc64": "powerpc",
+}
+type = _types.get(os.uname()[4], "unknown")
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/backtrace.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/backtrace.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,193 @@
+/*
+ * Routines providing a simple monitor for use on the PowerMac.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/console.h>
+#include <xen/sched.h>
+#include <xen/symbols.h>
+
+static char namebuf[KSYM_NAME_LEN+1];
+
+/* Shamelessly lifted from Linux Xmon try to keep pristene */
+#ifdef __powerpc64__
+#define LRSAVE_OFFSET 0x10
+#define REG_FRAME_MARKER 0x7265677368657265ul /* "regshere" */
+#define MARKER_OFFSET 0x60
+#define REGS_OFFSET 0x70
+#define REG "%016lX"
+#else
+#define LRSAVE_OFFSET 4
+#define REG_FRAME_MARKER 0x72656773
+#define MARKER_OFFSET 8
+#define REGS_OFFSET 16
+#define REG "%08lX"
+#endif
+
+#define TRAP(regs) ((regs)->entry_vector & ~0xF)
+static int xmon_depth_to_print = 64;
+
+/* Very cheap human name for vector lookup. */
+static
+const char *getvecname(unsigned long vec)
+{
+ char *ret;
+
+ switch (vec) {
+ case 0x100: ret = "(System Reset)"; break;
+ case 0x200: ret = "(Machine Check)"; break;
+ case 0x300: ret = "(Data Access)"; break;
+ case 0x380: ret = "(Data SLB Access)"; break;
+ case 0x400: ret = "(Instruction Access)"; break;
+ case 0x480: ret = "(Instruction SLB Access)"; break;
+ case 0x500: ret = "(Hardware Interrupt)"; break;
+ case 0x600: ret = "(Alignment)"; break;
+ case 0x700: ret = "(Program Check)"; break;
+ case 0x800: ret = "(FPU Unavailable)"; break;
+ case 0x900: ret = "(Decrementer)"; break;
+ case 0xc00: ret = "(System Call)"; break;
+ case 0xd00: ret = "(Single Step)"; break;
+ case 0xf00: ret = "(Performance Monitor)"; break;
+ case 0xf20: ret = "(Altivec Unavailable)"; break;
+ case 0x1300: ret = "(Instruction Breakpoint)"; break;
+ default: ret = "";
+ }
+ return ret;
+}
+
+static int mread(unsigned long adrs, void *buf, int size)
+{
+ memcpy(buf, (void *)adrs, size);
+ return size;
+}
+
+static void get_function_bounds(unsigned long pc, unsigned long *startp,
+ unsigned long *endp)
+{
+ unsigned long size, offset;
+ const char *name;
+
+ *startp = *endp = 0;
+ if (pc == 0)
+ return;
+
+ name = symbols_lookup(pc, &size, &offset, namebuf);
+ if (name != NULL) {
+ *startp = pc - offset;
+ *endp = pc - offset + size;
+ }
+}
+
+/* Print an address in numeric and symbolic form (if possible) */
+static void xmon_print_symbol(unsigned long address, const char *mid,
+ const char *after)
+{
+ const char *name = NULL;
+ unsigned long offset, size;
+
+ printf(REG, address);
+
+ name = symbols_lookup(address, &size, &offset, namebuf);
+ if (name) {
+ printf("%s%s+%#lx/%#lx", mid, name, offset, size);
+ }
+ printf("%s", after);
+}
+
+static void backtrace(
+ unsigned long sp, unsigned long lr, unsigned long pc)
+{
+ unsigned long ip;
+ unsigned long newsp;
+ unsigned long marker;
+ int count = 0;
+ struct cpu_user_regs regs;
+
+ do {
+ if (sp > xenheap_phys_end) {
+ if (sp != 0)
+ printf("SP (%lx) is not in xen space\n", sp);
+ break;
+ }
+
+ if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long))
+ || !mread(sp, &newsp, sizeof(unsigned long))) {
+ printf("Couldn't read stack frame at %lx\n", sp);
+ break;
+ }
+
+ /*
+ * For the first stack frame, try to work out if
+ * LR and/or the saved LR value in the bottommost
+ * stack frame are valid.
+ */
+ if ((pc | lr) != 0) {
+ unsigned long fnstart, fnend;
+ unsigned long nextip;
+ int printip = 1;
+
+ get_function_bounds(pc, &fnstart, &fnend);
+ nextip = 0;
+ if (newsp > sp)
+ mread(newsp + LRSAVE_OFFSET, &nextip,
+ sizeof(unsigned long));
+ if (lr == ip) {
+ if (lr >= xenheap_phys_end
+ || (fnstart <= lr && lr < fnend))
+ printip = 0;
+ } else if (lr == nextip) {
+ printip = 0;
+ } else if (lr < xenheap_phys_end
+ && !(fnstart <= lr && lr < fnend)) {
+ printf("[link register ] ");
+ xmon_print_symbol(lr, " ", "\n");
+ }
+ if (printip) {
+ printf("["REG"] ", sp);
+ xmon_print_symbol(ip, " ", " (unreliable)\n");
+ }
+ pc = lr = 0;
+
+ } else {
+ printf("["REG"] ", sp);
+ xmon_print_symbol(ip, " ", "\n");
+ }
+
+ /* Look for "regshere" marker to see if this is
+ an exception frame. */
+ if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
+ && marker == REG_FRAME_MARKER) {
+ if (mread(sp + REGS_OFFSET, ®s, sizeof(regs))
+ != sizeof(regs)) {
+ printf("Couldn't read registers at %lx\n",
+ sp + REGS_OFFSET);
+ break;
+ }
+ printf("--- Exception: %x %s at ", regs.entry_vector,
+ getvecname(TRAP(®s)));
+ pc = regs.pc;
+ lr = regs.lr;
+ xmon_print_symbol(pc, " ", "\n");
+ }
+
+ if (newsp == 0)
+ break;
+
+ sp = newsp;
+ } while (count++ < xmon_depth_to_print);
+}
+
+void show_backtrace(ulong sp, ulong lr, ulong pc)
+{
+ console_start_sync();
+ backtrace(sp, lr, pc);
+ console_end_sync();
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/memory.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/memory.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,206 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Dan Poff <poff@xxxxxxxxxx>
+ * Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+unsigned long xenheap_phys_end;
+struct membuf {
+ ulong start;
+ ulong size;
+};
+
+typedef void (*walk_mem_fn)(struct membuf *, uint);
+
+static ulong free_xenheap(ulong start, ulong end)
+{
+ start = ALIGN_UP(start, PAGE_SIZE);
+ end = ALIGN_DOWN(end, PAGE_SIZE);
+
+ printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
+
+ if (oftree <= end && oftree >= start) {
+ printk("%s: Go around the devtree: 0x%lx - 0x%lx\n",
+ __func__, oftree, oftree_end);
+ init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
+ init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
+ } else {
+ init_xenheap_pages(start, end);
+ }
+
+ return ALIGN_UP(end, PAGE_SIZE);
+}
+
+static void set_max_page(struct membuf *mb, uint entries)
+{
+ int i;
+
+ for (i = 0; i < entries; i++) {
+ ulong end_page;
+
+ end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
+
+ if (end_page > max_page)
+ max_page = end_page;
+ }
+}
+
+/* mark all memory from modules onward as unused */
+static void heap_init(struct membuf *mb, uint entries)
+{
+ int i;
+ ulong start_blk;
+ ulong end_blk = 0;
+
+ for (i = 0; i < entries; i++) {
+ start_blk = mb[i].start;
+ end_blk = start_blk + mb[i].size;
+
+ if (start_blk < xenheap_phys_end) {
+ if (xenheap_phys_end > end_blk) {
+ panic("xenheap spans LMB\n");
+ }
+ if (xenheap_phys_end == end_blk)
+ continue;
+
+ start_blk = xenheap_phys_end;
+ }
+
+ init_boot_pages(start_blk, end_blk);
+ total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
+ }
+}
+
+static void ofd_walk_mem(void *m, walk_mem_fn fn)
+{
+ ofdn_t n;
+ uint p_len;
+ struct membuf mb[8];
+ static char name[] = "memory";
+
+ n = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", name, sizeof(name));
+ while (n > 0) {
+
+ p_len = ofd_getprop(m, n, "reg", mb, sizeof (mb));
+ if (p_len <= 0) {
+ panic("ofd_getprop(): failed\n");
+ }
+ if (p_len > sizeof(mb))
+ panic("%s: buffer is not big enuff for this firmware: "
+ "0x%lx < 0x%x\n", __func__, sizeof(mb), p_len);
+
+ fn(mb, p_len / sizeof(mb[0]));
+ n = ofd_node_find_next(m, n);
+ }
+}
+
+static void setup_xenheap(module_t *mod, int mcount)
+{
+ int i;
+ ulong freemem;
+
+ freemem = ALIGN_UP((ulong)_end, PAGE_SIZE);
+
+ for (i = 0; i < mcount; i++) {
+ u32 s;
+
+ if(mod[i].mod_end == mod[i].mod_start)
+ continue;
+
+ s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
+
+ if (mod[i].mod_start > (ulong)_start &&
+ mod[i].mod_start < (ulong)_end) {
+ /* mod was linked in */
+ continue;
+ }
+
+ if (s < freemem)
+ panic("module addresses must assend\n");
+
+ free_xenheap(freemem, s);
+ freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE);
+
+ }
+
+ /* the rest of the xenheap, starting at the end of modules */
+ free_xenheap(freemem, xenheap_phys_end);
+}
+
+void memory_init(module_t *mod, int mcount)
+{
+ ulong eomem;
+ ulong heap_start, heap_size;
+
+ printk("Physical RAM map:\n");
+
+ /* lets find out how much memory there is and set max_page */
+ max_page = 0;
+ ofd_walk_mem((void *)oftree, set_max_page);
+ eomem = max_page << PAGE_SHIFT;
+
+ if (eomem == 0){
+ panic("ofd_walk_mem() failed\n");
+ }
+ printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
+
+ /* Architecturally the first 4 pages are exception hendlers, we
+ * will also be copying down some code there */
+ heap_start = 4 << PAGE_SHIFT;
+ if (oftree < (ulong)_start)
+ heap_start = ALIGN_UP(oftree_end, PAGE_SIZE);
+
+ heap_start = init_boot_allocator(heap_start);
+ if (heap_start > (ulong)_start) {
+ panic("space below _start (%p) is not enough memory "
+ "for heap (0x%lx)\n", _start, heap_start);
+ }
+
+ /* allow everything else to be allocated */
+ total_pages = 0;
+ ofd_walk_mem((void *)oftree, heap_init);
+ if (total_pages == 0)
+ panic("heap_init: failed");
+
+ if (total_pages > max_page)
+ panic("total_pages > max_page: 0x%lx > 0x%lx\n",
+ total_pages, max_page);
+
+ printk("total_pages: 0x%016lx\n", total_pages);
+
+ init_frametable();
+ end_boot_allocator();
+
+ /* Add memory between the beginning of the heap and the beginning
+ * of out text */
+ free_xenheap(heap_start, (ulong)_start);
+
+ heap_size = xenheap_phys_end - heap_start;
+ printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
+
+ setup_xenheap(mod, mcount);
+
+ eomem = avail_domheap_pages();
+ printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
+ (eomem << PAGE_SHIFT) >> 20,
+ (eomem << PAGE_SHIFT) >> 10);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup_memory.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/ofd_fixup_memory.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,107 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+static char memory[] = "memory";
+
+struct mem_reg {
+ u64 addr;
+ u64 sz;
+};
+
+static void ofd_memory_clean(void *m)
+{
+ ofdn_t old;
+
+ /* Remove all old memory props */
+ do {
+ old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
+ memory, sizeof(memory));
+ if (old <= 0)
+ break;
+
+ ofd_node_prune(m, old);
+ } while (1);
+}
+
+static ofdn_t ofd_memory_node_create(
+ void *m, ofdn_t p, const char *ppath, const char *name,
+ const char *dt, ulong start, ulong size)
+{
+ struct mem_reg reg;
+ char path[128];
+ ulong l;
+ ofdn_t n;
+ ulong nl = strlen(name) + 1;
+ ulong dtl = strlen(dt) + 1;
+
+ l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
+ n = ofd_node_add(m, p, path, l + 1);
+ ofd_prop_add(m, n, "name", name, nl);
+ ofd_prop_add(m, n, "device_type", dt, dtl);
+
+ /* physical addresses usable without regard to OF */
+ reg.addr = start;
+ reg.sz = size;
+ ofd_prop_add(m, n, "reg", ®, sizeof (reg));
+
+ return n;
+}
+
+static void ofd_memory_rma_node(void *m, struct domain *d)
+{
+ ulong size = rma_size(d->arch.rma_order);
+ ofdn_t n;
+
+ n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, 0, size);
+ BUG_ON(n <= 0);
+}
+
+static void ofd_memory_extent_nodes(void *m, struct domain *d)
+{
+ ulong start;
+ ulong size;
+ ofdn_t n;
+ struct page_extents *pe;
+
+ list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+
+ start = pe->pfn << PAGE_SHIFT;
+ size = 1UL << (pe->order + PAGE_SHIFT);
+
+ n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
+ start, size);
+
+ BUG_ON(n <= 0);
+ }
+}
+
+void ofd_memory_props(void *m, struct domain *d)
+{
+ ofd_memory_clean(m);
+ ofd_memory_rma_node(m, d);
+ ofd_memory_extent_nodes(m,d);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/shadow.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/shadow.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/shadow.h>
+
+static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
+{
+ ulong sdr1_htabsize;
+
+ ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
+ ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
+ ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
+
+ sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
+
+ return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
+}
+
+static ulong htab_alloc(struct domain *d, uint order)
+{
+ ulong htab_raddr;
+ uint log_htab_bytes = order + PAGE_SHIFT;
+ uint htab_bytes = 1UL << log_htab_bytes;
+
+ /* we use xenheap pages to keep domheap pages usefull for domains */
+
+ if (order < 6)
+ order = 6; /* architectural minimum is 2^18 */
+ if (order > 34)
+ order = 34; /* architectural minimum is 2^46 */
+
+ htab_raddr = (ulong)alloc_xenheap_pages(order);
+ if (htab_raddr > 0) {
+ ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
+
+ d->arch.htab.order = order;
+ d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
+ d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
+ d->arch.htab.map = (union pte *)htab_raddr;
+ }
+ return htab_raddr;
+}
+
+static void htab_free(struct domain *d)
+{
+ ulong htab_raddr = GET_HTAB(d);
+
+ free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
+}
+
+
+unsigned int shadow_teardown(struct domain *d)
+{
+ htab_free(d);
+ return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d,
+ unsigned int megabytes,
+ int *preempted)
+{
+ unsigned int rc;
+ uint pages;
+ uint p;
+ uint order;
+ ulong addr;
+
+
+ if (d->arch.htab.order)
+ return -EBUSY;
+
+ if (megabytes == 0) {
+ /* old management tools */
+ megabytes = 1; /* 1/64th of 64M */
+ printk("%s: Fix management tools to set and get shadow/htab values\n"
+ " using %d MiB htab\n",
+ __func__, megabytes);
+ }
+ pages = megabytes << (20 - PAGE_SHIFT);
+ order = fls(pages) - 1; /* log2 truncated */
+ if (pages & ((1 << order) - 1))
+ ++order; /* round up */
+
+ addr = htab_alloc(d, order);
+
+ printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
+ d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
+
+ if (addr == 0)
+ return -ENOMEM;
+
+ /* XXX make this a continuation */
+ for (p = 0; p < (1 << order); p++)
+ clear_page((void *)(addr + (p << PAGE_SHIFT)));
+
+ return rc;
+}
+
+int shadow_domctl(struct domain *d,
+ xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+ if ( unlikely(d == current->domain) )
+ {
+ DPRINTK("Don't try to do a shadow op on yourself!\n");
+ return -EINVAL;
+ }
+
+ switch ( sc->op )
+ {
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ DPRINTK("Shadow is mandatory!\n");
+ return -EINVAL;
+
+ case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+ sc->mb = shadow_get_allocation(d);
+ return 0;
+
+ case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: {
+ int rc;
+ int preempted = 0;
+
+ rc = shadow_set_allocation(d, sc->mb, &preempted);
+
+ if (preempted)
+ /* Not finished. Set up to re-run the call. */
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_domctl, "h", u_domctl);
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = shadow_get_allocation(d);
+ return rc;
+ }
+
+ default:
+ printk("Bad shadow op %u\n", sc->op);
+ BUG();
+ return -EINVAL;
+ }
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/htab.c
--- a/xen/arch/powerpc/htab.c Fri Sep 01 12:52:12 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
-{
- ulong sdr1_htabsize;
-
- ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
- ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
- ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
-
- sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
-
- return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
-}
-
-void htab_alloc(struct domain *d, uint order)
-{
- ulong htab_raddr;
- ulong log_htab_bytes = order + PAGE_SHIFT;
- ulong htab_bytes = 1UL << log_htab_bytes;
-
- /* XXX use alloc_domheap_pages instead? */
- htab_raddr = (ulong)alloc_xenheap_pages(order);
- ASSERT(htab_raddr != 0);
- /* XXX check alignment guarantees */
- ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
-
- /* XXX slow. move memset out to service partition? */
- memset((void *)htab_raddr, 0, htab_bytes);
-
- d->arch.htab.order = order;
- d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
- d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
- d->arch.htab.map = (union pte *)htab_raddr;
- d->arch.htab.shadow = xmalloc_array(ulong,
- 1UL << d->arch.htab.log_num_ptes);
- ASSERT(d->arch.htab.shadow != NULL);
-}
-
-void htab_free(struct domain *d)
-{
- ulong htab_raddr = GET_HTAB(d);
-
- free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
- xfree(d->arch.htab.shadow);
-}
-
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|