# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID c073ebdbde8c0f5c9437706b46c4a34f35033c0c
# Parent 9d52a66c74996a66adf5ee71a0d7f91bb880f7fb
# Parent 954f4dea9da6336aaa35d0706aed55fde7909644
merge with xen-unstable.hg
---
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c | 58
linux-2.6-xen-sparse/include/asm-x86_64/e820.h | 63
linux-2.6-xen-sparse/include/xen/net_driver_util.h | 48
tools/xenstore/xenstored_proc.h | 27
.hgignore | 2
extras/mini-os/Makefile | 9
extras/mini-os/lib/printf.c | 4
extras/mini-os/lib/string.c | 4
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile | 1
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 2
linux-2.6-xen-sparse/drivers/xen/Makefile | 1
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 8
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 2
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 2
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 4
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 2
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 2
linux-2.6-xen-sparse/drivers/xen/console/console.c | 28
linux-2.6-xen-sparse/drivers/xen/core/Makefile | 11
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 185 +
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 31
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 3
linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 9
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 215 --
linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 4
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 31
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 56
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c | 2
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c | 4
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 23
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 6
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 8
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 4
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h | 63
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h | 42
linux-2.6-xen-sparse/include/xen/xenbus.h | 8
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch | 13
tools/libxc/Makefile | 1
tools/libxc/xc_csched.c | 50
tools/libxc/xc_linux_build.c | 13
tools/libxc/xc_linux_restore.c | 122 -
tools/libxc/xc_private.c | 22
tools/libxc/xc_ptrace.c | 77
tools/libxc/xc_ptrace.h | 3
tools/libxc/xc_ptrace_core.c | 7
tools/libxc/xc_tbuf.c | 56
tools/libxc/xenctrl.h | 11
tools/libxc/xg_private.h | 10
tools/python/xen/lowlevel/xc/xc.c | 61
tools/python/xen/lowlevel/xs/xs.c | 11
tools/python/xen/xend/XendDomain.py | 22
tools/python/xen/xend/XendDomainInfo.py | 14
tools/python/xen/xend/balloon.py | 11
tools/python/xen/xend/server/SrvDomain.py | 14
tools/python/xen/xend/xenstore/xstransact.py | 28
tools/python/xen/xm/main.py | 45
tools/tests/test_x86_emulator.c | 67
tools/xenstore/Makefile | 8
tools/xenstore/xenstored_core.c | 7
tools/xenstore/xenstored_core.h | 8
tools/xenstore/xenstored_domain.c | 37
tools/xenstore/xenstored_linux.c | 69
xen/arch/x86/domain_build.c | 5
xen/arch/x86/hvm/hvm.c | 16
xen/arch/x86/hvm/i8254.c | 405 +--
xen/arch/x86/hvm/intercept.c | 82
xen/arch/x86/hvm/svm/intr.c | 47
xen/arch/x86/hvm/svm/svm.c | 44
xen/arch/x86/hvm/svm/vmcb.c | 14
xen/arch/x86/hvm/vmx/io.c | 62
xen/arch/x86/hvm/vmx/vmx.c | 37
xen/arch/x86/mm.c | 129 +
xen/arch/x86/traps.c | 4
xen/arch/x86/x86_emulate.c | 81
xen/common/Makefile | 1
xen/common/grant_table.c | 15
xen/common/kernel.c | 5
xen/common/sched_credit.c | 1233 ++++++++++++
xen/common/schedule.c | 5
xen/common/trace.c | 6
xen/include/asm-x86/domain.h | 12
xen/include/asm-x86/hvm/domain.h | 6
xen/include/asm-x86/hvm/svm/intr.h | 1
xen/include/asm-x86/hvm/svm/svm.h | 1
xen/include/asm-x86/hvm/vcpu.h | 3
xen/include/asm-x86/hvm/vmx/vmx.h | 1
xen/include/asm-x86/hvm/vpit.h | 67
xen/include/asm-x86/string.h | 162 -
xen/include/asm-x86/x86_emulate.h | 66
xen/include/public/io/xenbus.h | 59
xen/include/public/sched_ctl.h | 5
xen/include/xen/sched-if.h | 2
xen/include/xen/softirq.h | 13
93 files changed, 2802 insertions(+), 1546 deletions(-)
diff -r 9d52a66c7499 -r c073ebdbde8c .hgignore
--- a/.hgignore Thu May 25 15:59:18 2006 -0600
+++ b/.hgignore Fri May 26 13:41:49 2006 -0600
@@ -14,7 +14,7 @@
.*\.orig$
.*\.rej$
.*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
^[^/]*\.bz2$
^TAGS$
^dist/.*$
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/Makefile
--- a/extras/mini-os/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/Makefile Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
override CPPFLAGS := -Iinclude $(CPPFLAGS)
ASFLAGS = -D__ASSEMBLY__
+LDLIBS = -L. -lminios
LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
ifeq ($(TARGET_ARCH),x86_32)
@@ -55,11 +56,11 @@ links:
links:
[ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
-libminios.a: $(OBJS) $(HEAD)
- ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+ $(AR) r libminios.a $(HEAD) $(OBJS)
-$(TARGET): links libminios.a $(HEAD)
- $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+ $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
gzip -f -9 -c $@.elf >$@.gz
.PHONY: clean
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/printf.c Fri May 26 13:41:49 2006 -0600
@@ -53,6 +53,8 @@
*
* $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
*/
+
+#if !defined HAVE_LIBC
#include <os.h>
#include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char
return i;
}
-
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/string.c Fri May 26 13:41:49 2006 -0600
@@ -17,6 +17,8 @@
* $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
****************************************************************************
*/
+
+#if !defined HAVE_LIBC
#include <os.h>
#include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
}
return NULL;
}
+
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Fri May 26
13:41:49 2006 -0600
@@ -2,7 +2,6 @@ ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
obj-y += util.o
endif
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += net_driver_util.o
obj-y += core/
#obj-y += char/
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri May 26 13:41:49
2006 -0600
@@ -329,7 +329,7 @@ out:
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
struct tpm_private *tp = dev->data;
DPRINTK("\n");
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri May 26 13:41:49 2006 -0600
@@ -1,5 +1,4 @@
-obj-y += net_driver_util.o
obj-y += util.o
obj-y += core/
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri May 26
13:41:49 2006 -0600
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
* Also protects non-atomic updates of current_pages and driver_pages, and
* balloon lists.
*/
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
/* We aim for 'current allocation' == 'target allocation'. */
static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
/* Resets the Xen limit, sets new target, and kicks off processing. */
static void set_new_target(unsigned long target)
{
+ unsigned long min_target;
+
+ /* Do not allow target to reduce below 2% of maximum memory size. */
+ min_target = max_pfn / 50;
+ target = max(target, min_target);
+
/* No need for lock. Not read-modify-write updates. */
hard_limit = ~0UL;
target_pages = target;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri May 26
13:41:49 2006 -0600
@@ -82,7 +82,7 @@ typedef struct {
static pending_req_t *pending_reqs;
static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
#define BLKBACK_INVALID_HANDLE (~0)
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri May 26 13:41:49
2006 -0600
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
* Callback received when the frontend's state changes.
*/
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
int err;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri May 26
13:41:49 2006 -0600
@@ -247,7 +247,7 @@ fail:
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
struct blkfront_info *info = dev->data;
struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
have ignored this request initially, as the device was
still mounted. */
struct xenbus_device * dev = info->xbdev;
- XenbusState state = xenbus_read_driver_state(dev->otherend);
+ enum xenbus_state state =
xenbus_read_driver_state(dev->otherend);
if (state == XenbusStateClosing)
blkfront_closing(dev);
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Fri May 26 13:41:49
2006 -0600
@@ -93,7 +93,7 @@ static struct block_device_operations xl
.ioctl = blkif_ioctl,
};
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
static struct xlbd_major_info *
xlbd_alloc_major_info(int major, int minor, int index)
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri May 26 13:41:49
2006 -0600
@@ -138,7 +138,7 @@ typedef struct {
*/
static pending_req_t pending_reqs[MAX_PENDING_REQS];
static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
/* NB. We use a different index type to differentiate from shared blk rings. */
typedef unsigned int PEND_RING_IDX;
#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Fri May 26
13:41:49 2006 -0600
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
{
unsigned int goal;
goal = simple_strtoul(str, NULL, 0);
- while (wbuf_size < goal)
- wbuf_size <<= 1;
+ if (goal) {
+ goal = roundup_pow_of_two(goal);
+ if (wbuf_size < goal)
+ wbuf_size = goal;
+ }
return 1;
}
__setup("xencons_bufsz=", xencons_bufsz_setup);
/* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
/* Common transmit-kick routine. */
static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
/******************** Kernel console driver ********************************/
-static void kcons_write(
- struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
{
int i = 0;
unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
spin_unlock_irqrestore(&xencons_lock, flags);
}
-static void kcons_write_dom0(
- struct console *c, const char *s, unsigned int count)
-{
- int rc;
-
- while ((count > 0) &&
- ((rc = HYPERVISOR_console_io(
- CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int
count)
+{
+
+ while (count > 0) {
+ int rc;
+ rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+ if (rc <= 0)
+ break;
count -= rc;
s += rc;
}
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Fri May 26 13:41:49
2006 -0600
@@ -4,8 +4,9 @@
obj-y := evtchn.o reboot.o gnttab.o features.o
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET) += skbuff.o
-obj-$(CONFIG_SMP) += smpboot.o
-obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS) += xen_proc.o
+obj-$(CONFIG_NET) += skbuff.o
+obj-$(CONFIG_SMP) += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri May 26 13:41:49
2006 -0600
@@ -51,10 +51,10 @@
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
*/
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
/* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ... NR_EVENT_CHANNELS-1] =
-1};
/* Packed IRQ information: binding type, sub-type index, and event channel. */
static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
}
/* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping. */
#ifndef NR_IPIS
#define NR_IPIS 1
#endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
/* Reference counts for bindings to IRQs. */
static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
/* Secondary CPUs must have no VIRQ or IPI bindings. */
- for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
for (virq = 0; virq < NR_VIRQS; virq++)
BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
void __init xen_init_IRQ(void)
{
int i;
- int cpu;
-
- spin_lock_init(&irq_mapping_update_lock);
init_evtchn_cpu_bindings();
- /* No VIRQ or IPI bindings. */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- for (i = 0; i < NR_VIRQS; i++)
- per_cpu(virq_to_irq, cpu)[i] = -1;
- for (i = 0; i < NR_IPIS; i++)
- per_cpu(ipi_to_irq, cpu)[i] = -1;
- }
-
- /* No event-channel -> IRQ mappings. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- evtchn_to_irq[i] = -1;
- mask_evtchn(i); /* No event channels are 'live' right now. */
- }
+ /* No event channels are 'live' right now. */
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ mask_evtchn(i);
/* No IRQ -> event-channel mappings. */
for (i = 0; i < NR_IRQS; i++)
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri May 26 13:41:49
2006 -0600
@@ -38,7 +38,6 @@
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
-#include <asm/fixmap.h>
#include <asm/uaccess.h>
#include <xen/gnttab.h>
#include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
static grant_entry_t *shared = NULL;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri May 26 13:41:49
2006 -0600
@@ -17,6 +17,7 @@
#include <linux/kthread.h>
#include <xen/gnttab.h>
#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
#if defined(__i386__) || defined(__x86_64__)
/*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
static int shutting_down = SHUTDOWN_INVALID;
static void __shutdown_handler(void *unused);
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend() (0)
-#define smp_resume() ((void)0)
-#endif
/* Ensure we run on the idle task page tables so that we will
switch page tables before running user space. This is needed
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri May 26 13:41:49
2006 -0600
@@ -23,6 +23,7 @@
#include <asm/pgalloc.h>
#include <xen/evtchn.h>
#include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
#include <xen/xenbus.h>
#ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
#elif !defined(CONFIG_X86_IO_APIC)
unsigned int maxcpus = NR_CPUS;
#endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
void __init prefill_possible_map(void)
{
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
cpu_idle();
}
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
{
vcpu_guest_context_t ctxt;
- struct task_struct *idle = idle_task(vcpu);
+ struct task_struct *idle = idle_task(cpu);
#ifdef __x86_64__
- struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
#else
- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
- if (vcpu == 0)
+ struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+ if (cpu == 0)
return;
memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
- BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+ ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+ BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
}
void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
cpu_set(cpu, cpu_present_map);
#endif
- vcpu_prepare(cpu);
- }
-
- xenbus_allowed_cpumask = cpu_present_map;
+ cpu_initialize_context(cpu);
+ }
+
+ init_xenbus_allowed_cpumask();
/* Currently, Xen gives no dynamic NUMA/HT info. */
for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
cpu_online_map = cpumask_of_cpu(0);
}
-static int local_cpu_hotplug_request(void)
-{
- /*
- * We assume a CPU hotplug request comes from local admin if it is made
- * via a userspace process (i.e., one with a real mm_struct).
- */
- return (current->mm != NULL);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
/*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
}
core_initcall(initialize_cpu_present_map);
-static void vcpu_hotplug(unsigned int cpu)
-{
- int err;
- char dir[32], state[32];
-
- if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
- return;
-
- sprintf(dir, "cpu/%d", cpu);
- err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
- if (err != 1) {
- printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
- return;
- }
-
- if (strcmp(state, "online") == 0) {
- cpu_set(cpu, xenbus_allowed_cpumask);
- (void)cpu_up(cpu);
- } else if (strcmp(state, "offline") == 0) {
- cpu_clear(cpu, xenbus_allowed_cpumask);
- (void)cpu_down(cpu);
- } else {
- printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
- state, cpu);
- }
-}
-
-static void handle_vcpu_hotplug_event(
- struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
- int cpu;
- char *cpustr;
- const char *node = vec[XS_WATCH_PATH];
-
- if ((cpustr = strstr(node, "cpu/")) != NULL) {
- sscanf(cpustr, "cpu/%d", &cpu);
- vcpu_hotplug(cpu);
- }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
-
- /*
- * We do this in a callback notifier rather than __cpu_disable()
- * because local_cpu_hotplug_request() does not work in the latter
- * as it's always executed from within a stopmachine kthread.
- */
- if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
- cpu_clear(cpu, local_allowed_cpumask);
-
- return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
- unsigned long event, void *data)
-{
- int i;
-
- static struct xenbus_watch cpu_watch = {
- .node = "cpu",
- .callback = handle_vcpu_hotplug_event,
- .flags = XBWF_new_thread };
- (void)register_xenbus_watch(&cpu_watch);
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- for_each_cpu(i)
- vcpu_hotplug(i);
- printk(KERN_INFO "Brought up %ld CPUs\n",
- (long)num_online_cpus());
- }
-
- return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
- static struct notifier_block hotplug_cpu = {
- .notifier_call = smpboot_cpu_notify };
- static struct notifier_block xsn_cpu = {
- .notifier_call = setup_cpu_watcher };
-
- register_cpu_notifier(&hotplug_cpu);
- register_xenstore_notifier(&xsn_cpu);
-
- return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
- int i, err;
-
- lock_cpu_hotplug();
-
- /*
- * Take all other CPUs offline. We hold the hotplug mutex to
- * avoid other processes bringing up CPUs under our feet.
- */
- while (num_online_cpus() > 1) {
- unlock_cpu_hotplug();
- for_each_online_cpu(i) {
- if (i == 0)
- continue;
- err = cpu_down(i);
- if (err) {
- printk(KERN_CRIT "Failed to take all CPUs "
- "down: %d.\n", err);
- for_each_cpu(i)
- vcpu_hotplug(i);
- return err;
- }
- }
- lock_cpu_hotplug();
- }
-
- return 0;
-}
-
-void smp_resume(void)
-{
- int i;
-
- for_each_cpu(i)
- vcpu_prepare(i);
-
- unlock_cpu_hotplug();
-
- for_each_cpu(i)
- vcpu_hotplug(i);
-}
-
static void
remove_siblinginfo(int cpu)
{
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
#else /* !CONFIG_HOTPLUG_CPU */
-int smp_suspend(void)
-{
- if (num_online_cpus() > 1) {
- printk(KERN_WARNING "Can't suspend SMP guests "
- "without CONFIG_HOTPLUG_CPU\n");
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-void smp_resume(void)
-{
-}
-
int __cpu_disable(void)
{
return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
{
int rc;
- if (local_cpu_hotplug_request()) {
- cpu_set(cpu, local_allowed_cpumask);
- if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
- printk("%s: attempt to bring up CPU %u disallowed by "
- "remote admin.\n", __FUNCTION__, cpu);
- return -EBUSY;
- }
- } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
- !cpu_isset(cpu, xenbus_allowed_cpumask)) {
- return -EBUSY;
- }
+ rc = cpu_up_is_allowed(cpu);
+ if (rc)
+ return rc;
#ifdef CONFIG_SMP_ALTERNATIVES
if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
cpu_set(cpu, cpu_online_map);
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
- if (rc != 0)
- BUG();
+ BUG_ON(rc);
return 0;
}
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri May 26
13:41:49 2006 -0600
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
#define MAX_MFN_ALLOC 64
static unsigned long mfn_list[MAX_MFN_ALLOC];
static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
static unsigned long alloc_mfn(void)
{
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long
static void netif_idx_release(u16 pending_idx)
{
- static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(_lock);
unsigned long flags;
spin_lock_irqsave(&_lock, flags);
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 26 13:41:49
2006 -0600
@@ -17,13 +17,10 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
#include <stdarg.h>
#include <linux/module.h>
#include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
#include "common.h"
-
#if 0
#undef DPRINTK
@@ -31,22 +28,19 @@
printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#endif
-
struct backend_info
{
struct xenbus_device *dev;
netif_t *netif;
struct xenbus_watch backend_watch;
- XenbusState frontend_state;
+ enum xenbus_state frontend_state;
};
-
static int connect_rings(struct backend_info *);
static void connect(struct backend_info *);
static void maybe_connect(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
-
static int netback_remove(struct xenbus_device *dev)
{
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
* Callback received when the frontend's state changes.
*/
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
kfree(ratestr);
}
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+ char *s, *e, *macstr;
+ int i;
+
+ macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = simple_strtoul(s, &e, 16);
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+ kfree(macstr);
+ return -ENOENT;
+ }
+ s = e+1;
+ }
+
+ kfree(macstr);
+ return 0;
+}
static void connect(struct backend_info *be)
{
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri May 26
13:41:49 2006 -0600
@@ -60,7 +60,6 @@
#include <asm/uaccess.h>
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
#define GRANT_INVALID_REF 0
@@ -88,12 +87,6 @@ struct netfront_info {
unsigned int handle;
unsigned int evtchn, irq;
-
- /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED 0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED 2
- unsigned int backend_state;
/* Receive-ring batched refills. */
#define RX_MIN_TARGET 8
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
list[0] = list[id];
return id;
}
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
- [BEST_CLOSED] = "closed",
- [BEST_DISCONNECTED] = "disconnected",
- [BEST_CONNECTED] = "connected",
-};
-#endif
#define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
__FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
return talk_to_backend(dev, info);
}
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+ char *s, *e, *macstr;
+ int i;
+
+ macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = simple_strtoul(s, &e, 16);
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+ kfree(macstr);
+ return -ENOENT;
+ }
+ s = e+1;
+ }
+
+ kfree(macstr);
+ return 0;
+}
/* Common code used when first setting up, and when resuming. */
static int talk_to_backend(struct xenbus_device *dev,
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
}
memset(txs, 0, PAGE_SIZE);
memset(rxs, 0, PAGE_SIZE);
- info->backend_state = BEST_DISCONNECTED;
SHARED_RING_INIT(txs);
FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
DPRINTK("\n");
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
- if (np->backend_state != BEST_CONNECTED)
+ if (unlikely(!netif_carrier_ok(dev)))
return;
do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
struct xen_memory_reservation reservation;
grant_ref_t ref;
- if (unlikely(np->backend_state != BEST_CONNECTED))
+ if (unlikely(!netif_carrier_ok(dev)))
return;
/*
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
spin_lock_irq(&np->tx_lock);
- if (np->backend_state != BEST_CONNECTED) {
+ if (unlikely(!netif_carrier_ok(dev))) {
spin_unlock_irq(&np->tx_lock);
goto drop;
}
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device
spin_lock(&np->rx_lock);
- if (np->backend_state != BEST_CONNECTED) {
+ if (unlikely(!netif_carrier_ok(dev))) {
spin_unlock(&np->rx_lock);
return 0;
}
@@ -1041,7 +1046,7 @@ static void network_connect(struct net_d
* domain a kick because we've probably just requeued some
* packets.
*/
- np->backend_state = BEST_CONNECTED;
+ netif_carrier_on(dev);
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
@@ -1055,7 +1060,7 @@ static void show_device(struct netfront_
if (np) {
IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
np->handle,
- be_state_name[np->backend_state],
+ netif_carrier_ok(np->netdev) ? "on" : "off",
netif_running(np->netdev) ? "open" : "closed",
np->evtchn,
np->tx,
@@ -1241,9 +1246,10 @@ static struct net_device * __devinit cre
}
np = netdev_priv(netdev);
- np->backend_state = BEST_CLOSED;
np->handle = handle;
np->xbdev = dev;
+
+ netif_carrier_off(netdev);
spin_lock_init(&np->tx_lock);
spin_lock_init(&np->rx_lock);
@@ -1392,7 +1398,7 @@ static void netif_disconnect_backend(str
/* Stop old i/f to prevent errors whilst we rebuild the state. */
spin_lock_irq(&info->tx_lock);
spin_lock(&info->rx_lock);
- info->backend_state = BEST_DISCONNECTED;
+ netif_carrier_off(info->netdev);
spin_unlock(&info->rx_lock);
spin_unlock_irq(&info->tx_lock);
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Fri May 26 13:41:49
2006 -0600
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
}
static void pciback_frontend_changed(struct xenbus_device *xdev,
- XenbusState fe_state)
+ enum xenbus_state fe_state)
{
struct pciback_device *pdev = xdev->data;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Fri May 26
13:41:49 2006 -0600
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
static int pcifront_try_disconnect(struct pcifront_device *pdev)
{
int err = 0;
- XenbusState prev_state;
+ enum xenbus_state prev_state;
spin_lock(&pdev->dev_lock);
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
}
static void pcifront_backend_changed(struct xenbus_device *xdev,
- XenbusState be_state)
+ enum xenbus_state be_state)
{
struct pcifront_device *pdev = xdev->data;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri May 26
13:41:49 2006 -0600
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
break;
case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
- mmu_update_t u;
- uint64_t ptep;
-#endif
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
if (get_user(mfn, p))
return -EFAULT;
-#ifdef __ia64__
+
ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
- mfn, 1 << PAGE_SHIFT,
+ mfn, PAGE_SIZE,
vma->vm_page_prot, m.dom);
if (ret < 0)
- goto batch_err;
-#else
-
- ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
- if (ret)
- goto batch_err;
-
- u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
- u.ptr = ptep;
-
- if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
put_user(0xF0000000 | mfn, p);
-#endif
}
ret = 0;
@@ -283,6 +267,9 @@ static int __init privcmd_init(void)
set_bit(__HYPERVISOR_mmuext_op, hypercall_permission_map);
set_bit(__HYPERVISOR_xen_version, hypercall_permission_map);
set_bit(__HYPERVISOR_sched_op, hypercall_permission_map);
+ set_bit(__HYPERVISOR_sched_op_compat, hypercall_permission_map);
+ set_bit(__HYPERVISOR_event_channel_op_compat,
+ hypercall_permission_map);
privcmd_intf = create_xen_proc_entry("privcmd", 0400);
if (privcmd_intf != NULL)
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu May 25 15:59:18
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Fri May 26 13:41:49
2006 -0600
@@ -34,7 +34,7 @@ struct backend_info
/* watch front end for changes */
struct xenbus_watch backend_watch;
- XenbusState frontend_state;
+ enum xenbus_state frontend_state;
};
static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len);
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state);
+ enum xenbus_state frontend_state);
static int tpmback_remove(struct xenbus_device *dev)
{
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
int err;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri May 26
13:41:49 2006 -0600
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
EXPORT_SYMBOL_GPL(xenbus_watch_path2);
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
{
/* We check whether the state is currently set to the given value, and
if not, then the state is set. We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
}
-XenbusState xenbus_read_driver_state(const char *path)
-{
- XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+ enum xenbus_state result;
int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
if (err)
result = XenbusStateClosed;
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu May 25
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri May 26
13:41:49 2006 -0600
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
struct xenbus_device *dev =
container_of(watch, struct xenbus_device, otherend_watch);
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
- XenbusState state;
+ enum xenbus_state state;
/* Protect us against watches firing on old details when the otherend
details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
size_t stringlen;
char *tmpstring;
- XenbusState state = xenbus_read_driver_state(nodename);
+ enum xenbus_state state = xenbus_read_driver_state(nodename);
if (state != XenbusStateInitialising) {
/* Device is not new, so ignore it. This can happen if a
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri May 26 13:41:49 2006 -0600
@@ -75,7 +75,7 @@ struct xenbus_device {
int otherend_id;
struct xenbus_watch otherend_watch;
struct device dev;
- XenbusState state;
+ enum xenbus_state state;
void *data;
};
@@ -98,7 +98,7 @@ struct xenbus_driver {
int (*probe)(struct xenbus_device *dev,
const struct xenbus_device_id *id);
void (*otherend_changed)(struct xenbus_device *dev,
- XenbusState backend_state);
+ enum xenbus_state backend_state);
int (*remove)(struct xenbus_device *dev);
int (*suspend)(struct xenbus_device *dev);
int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
* Return 0 on success, or -errno on error. On error, the device will switch
* to XenbusStateClosing, and the error will be saved in the store.
*/
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state
new_state);
/**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
* Return the state of the driver rooted at the given store path, or
* XenbusStateClosed if no state can be read.
*/
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
/***
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/Makefile
--- a/tools/libxc/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/Makefile Fri May 26 13:41:49 2006 -0600
@@ -20,6 +20,7 @@ SRCS += xc_physdev.c
SRCS += xc_physdev.c
SRCS += xc_private.c
SRCS += xc_sedf.c
+SRCS += xc_csched.c
SRCS += xc_tbuf.c
ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_build.c Fri May 26 13:41:49 2006 -0600
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
- unsigned long ppt_alloc, count, nmfn;
+ unsigned long ppt_alloc, count;
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
- if ( page_array[ppt_alloc] > 0xfffff )
- {
- nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
- if ( nmfn == 0 )
- {
- fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
- goto error_out;
- }
- page_array[ppt_alloc] = nmfn;
- }
alloc_pt(l3tab, vl3tab, pl3tab);
vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c Fri May 26 13:41:49 2006 -0600
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int
** A page table page - need to 'uncanonicalize' it, i.e.
** replace all the references to pfns with the corresponding
** mfns for the new domain.
- **
- ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
- ** so we may need to update the p2m after the main loop.
- ** Hence we defer canonicalization of L1s until then.
*/
- if(pt_levels != 3 || pagetype != L1TAB) {
-
- if(!uncanonicalize_pagetable(pagetype, page)) {
- /*
- ** Failing to uncanonicalize a page table can be ok
- ** under live migration since the pages type may have
- ** changed by now (and we'll get an update later).
- */
- DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
- pagetype >> 28, pfn, mfn);
- nraces++;
- continue;
- }
-
+ if(!uncanonicalize_pagetable(pagetype, page)) {
+ /*
+ ** Failing to uncanonicalize a page table can be ok
+ ** under live migration since the pages type may have
+ ** changed by now (and we'll get an update later).
+ */
+ DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+ pagetype >> 28, pfn, mfn);
+ nraces++;
+ continue;
}
} else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int
}
DPRINTF("Received all pages (%d races)\n", nraces);
-
- if(pt_levels == 3) {
-
- /*
- ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
- ** is a little awkward and involves (a) finding all such PGDs and
- ** replacing them with 'lowmem' versions; (b) upating the p2m[]
- ** with the new info; and (c) canonicalizing all the L1s using the
- ** (potentially updated) p2m[].
- **
- ** This is relatively slow (and currently involves two passes through
- ** the pfn_type[] array), but at least seems to be correct. May wish
- ** to consider more complex approaches to optimize this later.
- */
-
- int j, k;
-
- /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
- for (i = 0; i < max_pfn; i++) {
-
- if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
- unsigned long new_mfn;
- uint64_t l3ptes[4];
- uint64_t *l3tab;
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3ptes[j] = l3tab[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
- ERR("Couldn't get a page below 4GB :-(");
- goto out;
- }
-
- p2m[i] = new_mfn;
- if (xc_add_mmu_update(xc_handle, mmu,
- (((unsigned long long)new_mfn)
- << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE, i)) {
- ERR("Couldn't m2p on PAE root pgdir");
- goto out;
- }
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3tab[j] = l3ptes[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- }
- }
-
- /* Second pass: find all L1TABs and uncanonicalize them */
- j = 0;
-
- for(i = 0; i < max_pfn; i++) {
-
- if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
- region_mfn[j] = p2m[i];
- j++;
- }
-
- if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
- if (!(region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_READ | PROT_WRITE,
- region_mfn, j))) {
- ERR("map batch failed");
- goto out;
- }
-
- for(k = 0; k < j; k++) {
- if(!uncanonicalize_pagetable(L1TAB,
- region_base + k*PAGE_SIZE)) {
- ERR("failed uncanonicalize pt!");
- goto out;
- }
- }
-
- munmap(region_base, j*PAGE_SIZE);
- j = 0;
- }
- }
-
- }
if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_private.c Fri May 26 13:41:49 2006 -0600
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
return rc;
}
-unsigned long xc_make_page_below_4G(
- int xc_handle, uint32_t domid, unsigned long mfn)
-{
- unsigned long new_mfn;
-
- if ( xc_domain_memory_decrease_reservation(
- xc_handle, domid, 1, 0, &mfn) != 0 )
- {
- fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
- return 0;
- }
-
- if ( xc_domain_memory_increase_reservation(
- xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
- {
- fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
- return 0;
- }
-
- return new_mfn;
-}
-
/*
* Local variables:
* mode: C
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.c Fri May 26 13:41:49 2006 -0600
@@ -185,7 +185,7 @@ map_domain_va_32(
void *guest_va,
int perm)
{
- unsigned long l1p, p, va = (unsigned long)guest_va;
+ unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
uint32_t *l2, *l1;
static void *v[MAX_VIRT_CPUS];
@@ -194,18 +194,20 @@ map_domain_va_32(
if ( l2 == NULL )
return NULL;
- l1p = to_ma(cpu, l2[l2_table_offset(va)]);
+ l2e = l2[l2_table_offset_i386(va)];
munmap(l2, PAGE_SIZE);
- if ( !(l1p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l1p >> PAGE_SHIFT);
if ( l1 == NULL )
return NULL;
- p = to_ma(cpu, l1[l1_table_offset(va)]);
+ l1e = l1[l1_table_offset_i386(va)];
munmap(l1, PAGE_SIZE);
- if ( !(p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
if ( v[cpu] != NULL )
munmap(v[cpu], PAGE_SIZE);
v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p
>> PAGE_SHIFT);
@@ -223,7 +225,7 @@ map_domain_va_pae(
void *guest_va,
int perm)
{
- unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+ unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
uint64_t *l3, *l2, *l1;
static void *v[MAX_VIRT_CPUS];
@@ -232,26 +234,29 @@ map_domain_va_pae(
if ( l3 == NULL )
return NULL;
- l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+ l3e = l3[l3_table_offset_pae(va)];
munmap(l3, PAGE_SIZE);
- if ( !(l2p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l3e & _PAGE_PRESENT) )
+ return NULL;
+ l2p = to_ma(cpu, l3e);
l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l2p >> PAGE_SHIFT);
if ( l2 == NULL )
return NULL;
- l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+ l2e = l2[l2_table_offset_pae(va)];
munmap(l2, PAGE_SIZE);
- if ( !(l1p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p
>> PAGE_SHIFT);
if ( l1 == NULL )
return NULL;
- p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
+ l1e = l1[l1_table_offset_pae(va)];
munmap(l1, PAGE_SIZE);
- if ( !(p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
if ( v[cpu] != NULL )
munmap(v[cpu], PAGE_SIZE);
v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p
>> PAGE_SHIFT);
@@ -269,9 +274,10 @@ map_domain_va_64(
void *guest_va,
int perm)
{
- unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+ unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned
long)guest_va;
uint64_t *l4, *l3, *l2, *l1;
static void *v[MAX_VIRT_CPUS];
+
if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -281,40 +287,41 @@ map_domain_va_64(
if ( l4 == NULL )
return NULL;
- l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+ l4e = l4[l4_table_offset(va)];
munmap(l4, PAGE_SIZE);
- if ( !(l3p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l4e & _PAGE_PRESENT) )
+ return NULL;
+ l3p = to_ma(cpu, l4e);
l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l3p >> PAGE_SHIFT);
if ( l3 == NULL )
return NULL;
- l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+ l3e = l3[l3_table_offset(va)];
munmap(l3, PAGE_SIZE);
- if ( !(l2p & _PAGE_PRESENT) )
- return NULL;
+ if ( !(l3e & _PAGE_PRESENT) )
+ return NULL;
+ l2p = to_ma(cpu, l3e);
l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l2p >> PAGE_SHIFT);
if ( l2 == NULL )
return NULL;
l1 = NULL;
- l1e = to_ma(cpu, l2[l2_table_offset(va)]);
- if ( !(l1e & _PAGE_PRESENT) )
- {
- munmap(l2, PAGE_SIZE);
- return NULL;
- }
- l1p = l1e >> PAGE_SHIFT;
- if (l1e & 0x80) { /* 2M pages */
+ l2e = l2[l2_table_offset(va)];
+ munmap(l2, PAGE_SIZE);
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
+ if (l2e & 0x80) { /* 2M pages */
p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
} else { /* 4K pages */
- l1p = to_ma(cpu, l1p);
l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm,
l1p >> PAGE_SHIFT);
- munmap(l2, PAGE_SIZE);
if ( l1 == NULL )
return NULL;
- p = to_ma(cpu, l1[l1_table_offset(va)]);
+ l1e = l1[l1_table_offset(va)];
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
}
if ( v[cpu] != NULL )
munmap(v[cpu], PAGE_SIZE);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.h Fri May 26 13:41:49 2006 -0600
@@ -7,7 +7,6 @@
#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
#define X86_CR0_PG 0x80000000 /* Paging (RW) */
#define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT 22
#define PSL_T 0x00000100 /* trace enable bit */
#ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
#endif
#define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
#endif
typedef void (*thr_ev_handler_t)(long);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace_core.c Fri May 26 13:41:49 2006 -0600
@@ -3,6 +3,7 @@
#include <sys/ptrace.h>
#include <sys/wait.h>
#include "xc_private.h"
+#include "xg_private.h"
#include "xc_ptrace.h"
#include <time.h>
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd,
}
cr3_virt[cpu] = v;
}
- if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+ if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical
address */
return NULL;
if (ctxt[cpu].flags & VGCF_HVM_GUEST)
pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd,
return NULL;
pde_virt[cpu] = v;
}
- if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+ if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical
address */
return NULL;
if (ctxt[cpu].flags & VGCF_HVM_GUEST)
page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd,
map_mtop_offset(page_phys[cpu]));
if (v == MAP_FAILED)
{
- printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page,
vtopti(va));
+ printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page,
l1_table_offset_i386(va));
page_phys[cpu] = 0;
return NULL;
}
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_tbuf.c Fri May 26 13:41:49 2006 -0600
@@ -18,53 +18,57 @@
static int tbuf_enable(int xc_handle, int enable)
{
- DECLARE_DOM0_OP;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- if (enable)
- op.u.tbufcontrol.op = DOM0_TBUF_ENABLE;
- else
- op.u.tbufcontrol.op = DOM0_TBUF_DISABLE;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ if (enable)
+ op.u.tbufcontrol.op = DOM0_TBUF_ENABLE;
+ else
+ op.u.tbufcontrol.op = DOM0_TBUF_DISABLE;
- return xc_dom0_op(xc_handle, &op);
+ return xc_dom0_op(xc_handle, &op);
}
int xc_tbuf_set_size(int xc_handle, unsigned long size)
{
- DECLARE_DOM0_OP;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE;
- op.u.tbufcontrol.size = size;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE;
+ op.u.tbufcontrol.size = size;
- return xc_dom0_op(xc_handle, &op);
+ return xc_dom0_op(xc_handle, &op);
}
int xc_tbuf_get_size(int xc_handle, unsigned long *size)
{
- int rc;
- DECLARE_DOM0_OP;
+ int rc;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
- rc = xc_dom0_op(xc_handle, &op);
- if (rc == 0)
- *size = op.u.tbufcontrol.size;
- return rc;
+ rc = xc_dom0_op(xc_handle, &op);
+ if (rc == 0)
+ *size = op.u.tbufcontrol.size;
+ return rc;
}
int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
- unsigned long *size)
+ unsigned long *size)
{
DECLARE_DOM0_OP;
int rc;
- if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
- return -1;
+ /*
+ * Ignore errors (at least for now) as we get an error if size is already
+ * set (since trace buffers cannot be reallocated). If we really have no
+ * buffers at all then tbuf_enable() will fail, so this is safe.
+ */
+ (void)xc_tbuf_set_size(xc_handle, cnt);
if ( tbuf_enable(xc_handle, 1) != 0 )
return -1;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xenctrl.h Fri May 26 13:41:49 2006 -0600
@@ -354,6 +354,14 @@ int xc_sedf_domain_get(int xc_handle,
uint64_t *latency, uint16_t *extratime,
uint16_t *weight);
+int xc_csched_domain_set(int xc_handle,
+ uint32_t domid,
+ struct csched_domain *sdom);
+
+int xc_csched_domain_get(int xc_handle,
+ uint32_t domid,
+ struct csched_domain *sdom);
+
typedef evtchn_status_t xc_evtchn_status_t;
/*
@@ -444,9 +452,6 @@ int xc_domain_iomem_permission(int xc_ha
unsigned long first_mfn,
unsigned long nr_mfns,
uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
- unsigned long mfn);
typedef dom0_perfc_desc_t xc_perfc_desc_t;
/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xg_private.h Fri May 26 13:41:49 2006 -0600
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
#define L2_PAGETABLE_SHIFT_PAE 21
#define L3_PAGETABLE_SHIFT_PAE 30
+#define L2_PAGETABLE_SHIFT_I386 22
+
#if defined(__i386__)
#define L1_PAGETABLE_SHIFT 12
#define L2_PAGETABLE_SHIFT 22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
#define L1_PAGETABLE_ENTRIES_PAE 512
#define L2_PAGETABLE_ENTRIES_PAE 512
#define L3_PAGETABLE_ENTRIES_PAE 4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
#if defined(__i386__)
#define L1_PAGETABLE_ENTRIES 1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
#define l3_table_offset_pae(_a) \
(((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
+#define l1_table_offset_i386(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
#if defined(__i386__)
#define l1_table_offset(_a) \
(((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri May 26 13:41:49 2006 -0600
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
"weight", weight);
}
+static PyObject *pyxc_csched_domain_set(XcObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ uint32_t domid;
+ uint16_t weight;
+ uint16_t cap;
+ static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+ static char kwd_type[] = "I|HH";
+ struct csched_domain sdom;
+
+ weight = 0;
+ cap = (uint16_t)~0U;
+ if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list,
+ &domid, &weight, &cap) )
+ return NULL;
+
+ sdom.weight = weight;
+ sdom.cap = cap;
+
+ if ( xc_csched_domain_set(self->xc_handle, domid, &sdom) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_csched_domain_get(XcObject *self, PyObject *args)
+{
+ uint32_t domid;
+ struct csched_domain sdom;
+
+ if( !PyArg_ParseTuple(args, "I", &domid) )
+ return NULL;
+
+ if ( xc_csched_domain_get(self->xc_handle, domid, &sdom) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ return Py_BuildValue("{s:H,s:H}",
+ "weight", sdom.weight,
+ "cap", sdom.cap);
+}
+
static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
{
uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
" slice [long]: CPU reservation per period\n"
" latency [long]: domain's wakeup latency hint\n"
" extratime [int]: domain aware of extratime?\n"},
+
+ { "csched_domain_set",
+ (PyCFunction)pyxc_csched_domain_set,
+ METH_KEYWORDS, "\n"
+ "Set the scheduling parameters for a domain when running with the\n"
+ "SMP credit scheduler.\n"
+ " domid [int]: domain id to set\n"
+ " weight [short]: domain's scheduling weight\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "csched_domain_get",
+ (PyCFunction)pyxc_csched_domain_get,
+ METH_VARARGS, "\n"
+ "Get the scheduling parameters for a domain when running with the\n"
+ "SMP credit scheduler.\n"
+ " domid [int]: domain id to get\n"
+ "Returns: [dict]\n"
+ " weight [short]: domain's scheduling weight\n"},
{ "evtchn_alloc_unbound",
(PyCFunction)pyxc_evtchn_alloc_unbound,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xs/xs.c Fri May 26 13:41:49 2006 -0600
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
if (perms) {
PyObject *val = PyList_New(perms_n);
- for (i = 0; i < perms_n; i++, perms++) {
- PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
- "dom", perms->id,
- "read", perms->perms & XS_PERM_READ,
- "write",perms->perms & XS_PERM_WRITE);
+ for (i = 0; i < perms_n; i++) {
+ PyObject *p =
+ Py_BuildValue("{s:i,s:i,s:i}",
+ "dom", perms[i].id,
+ "read", perms[i].perms & XS_PERM_READ,
+ "write", perms[i].perms & XS_PERM_WRITE);
PyList_SetItem(val, i, p);
}
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py Fri May 26 13:41:49 2006 -0600
@@ -522,6 +522,28 @@ class XendDomain:
except Exception, ex:
raise XendError(str(ex))
+ def domain_csched_get(self, domid):
+ """Get credit scheduler parameters for a domain.
+ """
+ dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ try:
+ return xc.csched_domain_get(dominfo.getDomid())
+ except Exception, ex:
+ raise XendError(str(ex))
+
+ def domain_csched_set(self, domid, weight, cap):
+ """Set credit scheduler parameters for a domain.
+ """
+ dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ try:
+ return xc.csched_domain_set(dominfo.getDomid(), weight, cap)
+ except Exception, ex:
+ raise XendError(str(ex))
+
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri May 26 13:41:49 2006 -0600
@@ -701,6 +701,16 @@ class XendDomainInfo:
log.debug("Storing VM details: %s", to_store)
self.writeVm(to_store)
+ self.setVmPermissions()
+
+
+ def setVmPermissions(self):
+ """Allow the guest domain to read its UUID. We don't allow it to
+ access any other entry, for security."""
+ xstransact.SetPermissions('%s/uuid' % self.vmpath,
+ { 'dom' : self.domid,
+ 'read' : True,
+ 'write' : False })
def storeDomDetails(self):
@@ -1535,6 +1545,10 @@ class XendDomainInfo:
self.configure_bootloader()
config = self.sxpr()
+
+ if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+ config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+ self.info['cpus'])])
if self.readVm(RESTART_IN_PROGRESS):
log.error('Xend failed during restart of domain %d. '
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/balloon.py Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB. We need th
BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are
# rounded.
RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
##
# The time to sleep between retries grows linearly, using this value (in
# seconds). When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
retries = 0
sleep_time = SLEEP_TIME_GROWTH
last_new_alloc = None
- while retries < RETRY_LIMIT:
+ rlimit = RETRY_LIMIT
+ while retries < rlimit:
free_mem = xc.physinfo()['free_memory']
if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
return
if retries == 0:
- log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+ rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+ log.debug("Balloon: free %d; need %d; retries: %d.",
+ free_mem, need_mem, rlimit)
if dom0_min_mem > 0:
dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
# Continue to retry, waiting for ballooning.
time.sleep(sleep_time)
+ if retries < 2 * RETRY_LIMIT:
+ sleep_time += SLEEP_TIME_GROWTH
retries += 1
- sleep_time += SLEEP_TIME_GROWTH
# Not enough memory; diagnose the problem.
if dom0_min_mem == 0:
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/server/SrvDomain.py Fri May 26 13:41:49 2006 -0600
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
['latency', 'int'],
['extratime', 'int'],
['weight', 'int']])
+ val = fn(req.args, {'dom': self.dom.domid})
+ return val
+
+ def op_domain_csched_get(self, _, req):
+ fn = FormFn(self.xd.domain_csched_get,
+ [['dom', 'int']])
+ val = fn(req.args, {'dom': self.dom.domid})
+ return val
+
+
+ def op_domain_csched_set(self, _, req):
+ fn = FormFn(self.xd.domain_csched_set,
+ [['dom', 'int'],
+ ['weight', 'int']])
val = fn(req.args, {'dom': self.dom.domid})
return val
diff -r 9d52a66c7499 -r c073ebdbde8c
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py Thu May 25 15:59:18
2006 -0600
+++ b/tools/python/xen/xend/xenstore/xstransact.py Fri May 26 13:41:49
2006 -0600
@@ -221,6 +221,34 @@ class xstransact:
xshandle().mkdir(self.transaction, self.prependPath(key))
+ def get_permissions(self, *args):
+ """If no arguments are given, return the permissions at this
+ transaction's path. If one argument is given, treat that argument as
+ a subpath to this transaction's path, and return the permissions at
+ that path. Otherwise, treat each argument as a subpath to this
+ transaction's path, and return a list composed of the permissions at
+ each of those instead.
+ """
+ if len(args) == 0:
+ return xshandle().get_permissions(self.transaction, self.path)
+ if len(args) == 1:
+ return self._get_permissions(args[0])
+ ret = []
+ for key in args:
+ ret.append(self._get_permissions(key))
+ return ret
+
+
+ def _get_permissions(self, key):
+ path = self.prependPath(key)
+ try:
+ return xshandle().get_permissions(self.transaction, path)
+ except RuntimeError, ex:
+ raise RuntimeError(ex.args[0],
+ '%s, while getting permissions from %s' %
+ (ex.args[1], path))
+
+
def set_permissions(self, *args):
if len(args) == 0:
raise TypeError
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xm/main.py Fri May 26 13:41:49 2006 -0600
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
specifies another way of setting a
domain's\n\
cpu period/slice."
+csched_help = "csched Set or get credit scheduler
parameters"
block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
[BackDomId] Create a new virtual block device"""
block_detach_help = """block-detach <DomId> <DevId> Destroy a domain's
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
]
scheduler_commands = [
+ "csched",
"sched-bvt",
"sched-bvt-ctxallow",
"sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
else:
print_sedf(sedf_info)
+def xm_csched(args):
+ usage_msg = """Csched: Set or get credit scheduler parameters
+ Usage:
+
+ csched -d domain [-w weight] [-c cap]
+ """
+ try:
+ opts, args = getopt.getopt(args[0:], "d:w:c:",
+ ["domain=", "weight=", "cap="])
+ except getopt.GetoptError:
+ # print help information and exit:
+ print usage_msg
+ sys.exit(1)
+
+ domain = None
+ weight = None
+ cap = None
+
+ for o, a in opts:
+ if o == "-d":
+ domain = a
+ elif o == "-w":
+ weight = int(a)
+ elif o == "-c":
+ cap = int(a);
+
+ if domain is None:
+ # place holder for system-wide scheduler parameters
+ print usage_msg
+ sys.exit(1)
+
+ if weight is None and cap is None:
+ print server.xend.domain.csched_get(domain)
+ else:
+ if weight is None:
+ weight = int(0)
+ if cap is None:
+ cap = int(~0)
+
+ err = server.xend.domain.csched_set(domain, weight, cap)
+ if err != 0:
+ print err
def xm_info(args):
arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
"sched-bvt": xm_sched_bvt,
"sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
"sched-sedf": xm_sched_sedf,
+ "csched": xm_csched,
# block
"block-attach": xm_block_attach,
"block-detach": xm_block_detach,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/tests/test_x86_emulator.c Fri May 26 13:41:49 2006 -0600
@@ -17,7 +17,8 @@ static int read_any(
static int read_any(
unsigned long addr,
unsigned long *val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -32,7 +33,8 @@ static int write_any(
static int write_any(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -48,7 +50,8 @@ static int cmpxchg_any(
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
- unsigned long new_hi)
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt)
{
((unsigned long *)addr)[0] = new_lo;
((unsigned long *)addr)[1] = new_hi;
return X86EMUL_CONTINUE;
}
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
};
int main(int argc, char **argv)
{
+ struct x86_emulate_ctxt ctxt;
struct cpu_user_regs regs;
char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
unsigned int res = 0x7FFFFFFF;
u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
- unsigned long cr2;
int rc;
+
+ ctxt.regs = ®s;
+ ctxt.mode = X86EMUL_MODE_PROT32;
printf("%-40s", "Testing addl %%ecx,(%%eax)...");
instr[0] = 0x01; instr[1] = 0x08;
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x7FFFFFFF;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
#else
regs.ecx = 0x12345678UL;
#endif
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x923456AA) ||
(regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x12345678) ||
(regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
regs.esi = (unsigned long)&res + 0;
regs.edi = (unsigned long)&res + 2;
regs.error_code = 0; /* read fault */
- cr2 = regs.esi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.esi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x44554455) ||
(regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)&res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
regs.ecx = 0xCCCCFFFF;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x82;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x1234aa82;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/Makefile Fri May 26 13:41:49 2006 -0600
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
CLIENTS += xenstore-write
CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
.PHONY: all
all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control
xenstore-ls
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
.PHONY: testcode
testcode: xs_test xenstored_test xs_random
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
$(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.c Fri May 26 13:41:49 2006 -0600
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
static bool write_node(struct connection *conn, const struct node *node)
{
+ /*
+ * conn will be null when this is called from manual_node.
+ * tdb_context copes with this.
+ */
+
TDB_DATA key, data;
void *p;
@@ -478,7 +483,7 @@ static bool write_node(struct connection
/* TDB should set errno, but doesn't even set ecode AFAICT. */
if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
- corrupt(conn, "Write of %s = %s failed", key, data);
+ corrupt(conn, "Write of %s failed", key.dptr);
goto error;
}
return true;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.h Fri May 26 13:41:49 2006 -0600
@@ -19,6 +19,8 @@
#ifndef _XENSTORED_CORE_H
#define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
#include <sys/types.h>
#include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
extern int event_fd;
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
#endif /* _XENSTORED_CORE_H */
/*
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Fri May 26 13:41:49 2006 -0600
@@ -33,12 +33,11 @@
#include "talloc.h"
#include "xenstored_core.h"
#include "xenstored_domain.h"
-#include "xenstored_proc.h"
#include "xenstored_watch.h"
#include "xenstored_test.h"
#include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
static int *xc_handle;
static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
static int dom0_init(void)
{
- int rc, fd;
- evtchn_port_t port;
- char str[20];
- struct domain *dom0;
-
- fd = open(XENSTORED_PROC_PORT, O_RDONLY);
- if (fd == -1)
+ evtchn_port_t port;
+ struct domain *dom0;
+
+ port = xenbus_evtchn();
+ if (port == -1)
return -1;
- rc = read(fd, str, sizeof(str));
- if (rc == -1)
- goto outfd;
- str[rc] = '\0';
- port = strtoul(str, NULL, 0);
-
- close(fd);
-
dom0 = new_domain(NULL, 0, port);
- fd = open(XENSTORED_PROC_KVA, O_RDWR);
- if (fd == -1)
+ dom0->interface = xenbus_map();
+ if (dom0->interface == NULL)
return -1;
- dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
- MAP_SHARED, fd, 0);
- if (dom0->interface == MAP_FAILED)
- goto outfd;
-
- close(fd);
-
talloc_steal(dom0->conn, dom0);
evtchn_notify(dom0->port);
return 0;
-outfd:
- close(fd);
- return -1;
}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/domain_build.c Fri May 26 13:41:49 2006 -0600
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
if ( (1UL << order) > nr_pages )
panic("Domain 0 allocation is too small for kernel image.\n");
- /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+ /*
+ * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+ * mapping covers the allocation.
+ */
if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
panic("Not enough RAM for domain 0 allocation.\n");
alloc_spfn = page_to_mfn(page);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/hvm.c Fri May 26 13:41:49 2006 -0600
@@ -185,8 +185,9 @@ void hvm_setup_platform(struct domain* d
void hvm_setup_platform(struct domain* d)
{
struct hvm_domain *platform;
-
- if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+ struct vcpu *v=current;
+
+ if ( !hvm_guest(v) || (v->vcpu_id != 0) )
return;
if ( shadow_direct_map_init(d) == 0 )
@@ -208,7 +209,8 @@ void hvm_setup_platform(struct domain* d
hvm_vioapic_init(d);
}
- pit_init(&platform->vpit, current);
+ init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v,
v->processor);
+ pit_init(v, cpu_khz);
}
void pic_irq_request(void *data, int level)
@@ -238,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
} while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
do_pic_irqs(pic, irqs);
}
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+ u64 host_tsc;
+
+ rdtscll(host_tsc);
+ return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
}
int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/i8254.c Fri May 26 13:41:49 2006 -0600
@@ -22,11 +22,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
* Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
* move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
*/
#include <xen/config.h>
@@ -42,184 +41,117 @@
#include <asm/hvm/vpit.h>
#include <asm/current.h>
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
#define RW_STATE_LSB 1
#define RW_STATE_MSB 2
#define RW_STATE_WORD0 3
#define RW_STATE_WORD1 4
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
- /* TODO: add pause/unpause support */
- return NOW();
+#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ union {
+ uint64_t ll;
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } u, res;
+ uint64_t rl, rh;
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+ uint64_t gtsc;
+ gtsc = hvm_get_guest_time(v);
+ return gtsc;
}
static int pit_get_count(PITChannelState *s)
{
- u64 d;
- u64 counter;
-
- d = hvm_get_clock() - s->count_load_time;
+ uint64_t d;
+ int counter;
+
+ d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ,
ticks_per_sec(s->vcpu));
switch(s->mode) {
case 0:
case 1:
case 4:
case 5:
- counter = (s->period - d) & 0xffff;
+ counter = (s->count - d) & 0xffff;
break;
case 3:
/* XXX: may be incorrect for odd counts */
- counter = s->period - ((2 * d) % s->period);
+ counter = s->count - ((2 * d) % s->count);
break;
default:
- /* mod 2 counter handle */
- d = hvm_get_clock() - s->hvm_time->count_point;
- d += s->hvm_time->count_advance;
- counter = s->period - (d % s->period);
- break;
- }
- /* change from ns to pit counter */
- counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+ counter = s->count - (d % s->count);
+ break;
+ }
return counter;
}
/* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
- u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+ uint64_t d;
int out;
- d = current_time - s->count_load_time;
+ d = muldiv64(current_time - s->count_load_time, PIT_FREQ,
ticks_per_sec(s->vcpu));
switch(s->mode) {
default:
case 0:
- out = (d >= s->period);
+ out = (d >= s->count);
break;
case 1:
- out = (d < s->period);
+ out = (d < s->count);
break;
case 2:
- /* mod2 out is no meaning, since intr are generated in background */
- if ((d % s->period) == 0 && d != 0)
+ if ((d % s->count) == 0 && d != 0)
out = 1;
else
out = 0;
break;
case 3:
- out = (d % s->period) < ((s->period + 1) >> 1);
+ out = (d % s->count) < ((s->count + 1) >> 1);
break;
case 4:
case 5:
- out = (d == s->period);
+ out = (d == s->count);
break;
}
return out;
}
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
{
PITChannelState *s = &pit->channels[channel];
return pit_get_out1(s, current_time);
}
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
- struct hvm_time_info *hvm_time = s->hvm_time;
- struct domain *d = (void *) s -
- offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
- /* ticks from current time(expected time) to NOW */
- int missed_ticks;
- /* current_time is expected time for next intr, check if it's true
- * (actimer has a TIMER_SLOP in advance)
- */
- s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
- if (missed_time >= 0) {
- missed_ticks = missed_time/(s_time_t)s->period + 1;
- if (test_bit(_DOMF_debugging, &d->domain_flags)) {
- hvm_time->pending_intr_nr++;
- } else {
- hvm_time->pending_intr_nr += missed_ticks;
- }
- s->next_transition_time = current_time + (missed_ticks ) * s->period;
- }
-
- return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- * -2: init state
- * -1: the mode has expired
- * 0: current VCPU is not running
- * >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s,
- s64 current_time)
-{
- s64 d, next_time, base;
- int period2;
- struct hvm_time_info *hvm_time = s->hvm_time;
-
- d = current_time - s->count_load_time;
- switch(s->mode) {
- default:
- case 0:
- case 1:
- if (d < s->period)
- next_time = s->period;
- else
- return -1;
- break;
- case 2:
- next_time = missed_ticks(s, current_time);
- if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
- return 0;
- break;
- case 3:
- base = (d / s->period) * s->period;
- period2 = ((s->period + 1) >> 1);
- if ((d - base) < period2)
- next_time = base + period2;
- else
- next_time = base + s->period;
- break;
- case 4:
- case 5:
- if (d < s->period)
- next_time = s->period;
- else if (d == s->period)
- next_time = s->period + 1;
- else
- return -1;
- break;
- case 0xff:
- return -2; /* for init state */
- break;
- }
- /* XXX: better solution: use a clock at PIT_FREQ Hz */
- if (next_time <= current_time){
-#ifdef DEBUG_PIT
- printk("HVM_PIT:next_time <= current_time. next=0x%llx,
current=0x%llx!\n",next_time, current_time);
-#endif
- next_time = current_time + 1;
- }
- return next_time;
-}
-
/* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
{
PITChannelState *s = &pit->channels[channel];
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
case 5:
if (s->gate < val) {
/* restart counting on rising edge */
- s->count_load_time = hvm_get_clock();
- pit_irq_timer_update(s, s->count_load_time);
+ s->count_load_time = hvm_get_clock(s->vcpu);
+// pit_irq_timer_update(s, s->count_load_time);
}
break;
case 2:
case 3:
if (s->gate < val) {
/* restart counting on rising edge */
- s->count_load_time = hvm_get_clock();
- pit_irq_timer_update(s, s->count_load_time);
+ s->count_load_time = hvm_get_clock(s->vcpu);
+// pit_irq_timer_update(s, s->count_load_time);
}
/* XXX: disable/enable counting */
break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
s->gate = val;
}
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
{
PITChannelState *s = &pit->channels[channel];
return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
static inline void pit_load_count(PITChannelState *s, int val)
{
+ u32 period;
if (val == 0)
val = 0x10000;
-
- s->count_load_time = hvm_get_clock();
+ s->count_load_time = hvm_get_clock(s->vcpu);
s->count = val;
- s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+ period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
#ifdef DEBUG_PIT
- printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d,
load_time=%lld\n",
+ printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d,
load_time=%lld\n",
+ s,
val,
- s->period / 1000,
+ period,
s->mode,
- s->count_load_time);
+ (long long)s->count_load_time);
#endif
- if (s->mode == HVM_PIT_ACCEL_MODE) {
- if (!s->hvm_time) {
- printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
- return;
- }
- s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
- s->hvm_time->first_injected = 0;
-
- if (s->period < 900000) { /* < 0.9 ms */
- printk("HVM_PIT: guest programmed too small an count: %x\n",
- s->count);
- s->period = 1000000;
- }
- }
-
- pit_irq_timer_update(s, s->count_load_time);
+ switch (s->mode) {
+ case 2:
+ /* create periodic time */
+ s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+ break;
+ case 1:
+ /* create one shot time */
+ s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+ printk("HVM_PIT: create one shot time.\n");
+#endif
+ break;
+ default:
+ break;
+ }
}
/* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
}
}
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
- hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PITState *pit = opaque;
int channel, access;
PITChannelState *s;
val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
if (!(val & 0x10) && !s->status_latched) {
/* status latch */
/* XXX: add BCD and null count */
- s->status = (pit_get_out1(s, hvm_get_clock()) << 7) |
+ s->status = (pit_get_out1(s, hvm_get_clock(s->vcpu))
<< 7) |
(s->rw_mode << 4) |
(s->mode << 1) |
s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
}
}
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
- hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+ PITState *pit = opaque;
int ret, count;
PITChannelState *s;
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
return ret;
}
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
- s64 expire_time;
- int irq_level;
- struct vcpu *v = current;
- struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
- if (!s->hvm_time || s->mode == 0xff)
- return;
-
- expire_time = pit_get_next_transition_time(s, current_time);
- /* not generate intr by direct pic_set_irq in mod 2
- * XXX:mod 3 should be same as mod 2
- */
- if (s->mode != HVM_PIT_ACCEL_MODE) {
- irq_level = pit_get_out1(s, current_time);
- pic_set_irq(pic, s->irq, irq_level);
- s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
- printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
- irq_level,
- (expire_time - current_time));
-#endif
- }
-
- if (expire_time > 0)
- set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
- PITChannelState *s = data;
-
- pit_irq_timer_update(s, s->next_transition_time);
-}
-
static void pit_reset(void *opaque)
{
- hvm_virpit *pit = opaque;
+ PITState *pit = opaque;
PITChannelState *s;
int i;
for(i = 0;i < 3; i++) {
s = &pit->channels[i];
+ if ( s -> pt ) {
+ destroy_periodic_time (s->pt);
+ s->pt = NULL;
+ }
s->mode = 0xff; /* the init mode */
s->gate = (i != 2);
pit_load_count(s, 0);
}
}
-/* hvm_io_assist light-weight version, specific to PIT DM */
-static void resume_pit_io(ioreq_t *p)
-{
- struct cpu_user_regs *regs = guest_cpu_user_regs();
- unsigned long old_eax = regs->eax;
- p->state = STATE_INVALID;
-
- switch(p->size) {
- case 1:
- regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
- break;
- case 2:
- regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
- break;
- case 4:
- regs->eax = (p->u.data & 0xffffffff);
- break;
- default:
- BUG();
- }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+ PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+ PITChannelState *s;
+
+ s = &pit->channels[0];
+ /* the timer 0 is connected to an IRQ */
+ s->vcpu = v;
+ s++; s->vcpu = v;
+ s++; s->vcpu = v;
+
+ register_portio_handler(PIT_BASE, 4, handle_pit_io);
+ /* register the speaker port */
+ register_portio_handler(0x61, 1, handle_speaker_io);
+ ticks_per_sec(v) = cpu_khz * (int64_t)1000;
+#ifdef DEBUG_PIT
+ printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+ pit_reset(pit);
+ return;
}
/* the intercept action for PIT DM retval:0--not handled; 1--handled */
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
if (p->size != 1 ||
p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
if (p->dir == 0) {/* write */
pit_ioport_write(vpit, p->addr, p->u.data);
} else if (p->dir == 1) { /* read */
- p->u.data = pit_ioport_read(vpit, p->addr);
- resume_pit_io(p);
- }
-
- /* always return 1, since PIT sit in HV now */
+ if ( (p->addr & 3) != 3 ) {
+ p->u.data = pit_ioport_read(vpit, p->addr);
+ } else {
+ printk("HVM_PIT: read A1:A0=3!\n");
+ }
+ }
return 1;
}
static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
- hvm_virpit *pit = opaque;
- val &= 0xff;
+ PITState *pit = opaque;
pit->speaker_data_on = (val >> 1) & 1;
pit_set_gate(pit, 2, val & 1);
}
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
{
int out;
- hvm_virpit *pit = opaque;
- out = pit_get_out(pit, 2, hvm_get_clock());
+ PITState *pit = opaque;
+ out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
pit->dummy_refresh_clock ^= 1;
return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
(pit->dummy_refresh_clock << 4);
}
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
if (p->size != 1 ||
p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
speaker_ioport_write(vpit, p->addr, p->u.data);
} else if (p->dir == 1) {/* read */
p->u.data = speaker_ioport_read(vpit, p->addr);
- resume_pit_io(p);
}
return 1;
}
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
- s64 next_time;
- PITChannelState *s = &(vpit->channels[0]);
- if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
- next_time = pit_get_next_transition_time(s, s->next_transition_time);
- if (next_time >= 0)
- set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
- }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
- PITChannelState *s;
- struct hvm_time_info *hvm_time;
-
- s = &pit->channels[0];
- /* the timer 0 is connected to an IRQ */
- s->irq = 0;
- /* channel 0 need access the related time info for intr injection */
- hvm_time = s->hvm_time = &pit->time_info;
- hvm_time->vcpu = v;
-
- init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
- register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
- /* register the speaker port */
- register_portio_handler(0x61, 1, handle_speaker_io);
-
- pit_reset(pit);
-
- return;
-
-}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/intercept.c Fri May 26 13:41:49 2006 -0600
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
evtchn_set_pending(v, iopacket_port(v));
}
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+ int missed_ticks;
+
+ missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+ if ( missed_ticks++ >= 0 ) {
+ if ( missed_ticks > 1000 ) {
+ /* TODO: Adjust guest time togther */
+ pt->pending_intr_nr ++;
+ }
+ else {
+ pt->pending_intr_nr += missed_ticks;
+ }
+ pt->scheduled += missed_ticks * pt->period;
+ }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+ struct vcpu *v = data;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ /* pick up missed timer tick */
+ missed_ticks(pt);
+ if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+ set_timer(&pt->timer, pt->scheduled);
+ }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+ if ( !active_timer(&(pt->timer)) ) {
+ missed_ticks(pt);
+ set_timer(&pt->timer, pt->scheduled);
+ }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+ struct vcpu *v,
+ u32 period,
+ char irq,
+ char one_shot)
+{
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+ if ( pt->enabled ) {
+ if ( v->vcpu_id != 0 ) {
+ printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+ }
+ stop_timer (&pt->timer);
+ pt->enabled = 0;
+ }
+ pt->pending_intr_nr = 0;
+ pt->first_injected = 0;
+ if (period < 900000) { /* < 0.9 ms */
+ printk("HVM_PlatformTime: program too small period %u\n",period);
+ period = 900000; /* force to 0.9ms */
+ }
+ pt->period = period;
+ pt->irq = irq;
+ pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+ pt->one_shot = one_shot;
+ if ( one_shot ) {
+ printk("HVM_PL: No support for one shot platform time yet\n");
+ }
+ pt->scheduled = NOW() + period;
+ set_timer (&pt->timer,pt->scheduled);
+ pt->enabled = 1;
+ return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+ if ( pt->enabled ) {
+ stop_timer(&pt->timer);
+ pt->enabled = 0;
+ }
+}
/*
* Local variables:
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c Fri May 26 13:41:49 2006 -0600
@@ -44,45 +44,33 @@
*/
#define BSP_CPU(v) (!(v->vcpu_id))
-u64 svm_get_guest_time(struct vcpu *v)
-{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
- u64 host_tsc;
-
- rdtscll(host_tsc);
- return host_tsc + time_info->cache_tsc_offset;
-}
-
void svm_set_guest_time(struct vcpu *v, u64 gtime)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
u64 host_tsc;
rdtscll(host_tsc);
- time_info->cache_tsc_offset = gtime - host_tsc;
- v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+ v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+ v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
}
static inline void
interrupt_post_injection(struct vcpu * v, int vector, int type)
{
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
if ( is_pit_irq(v, vector, type) ) {
- if ( !time_info->first_injected ) {
- time_info->pending_intr_nr = 0;
- time_info->last_pit_gtime = svm_get_guest_time(v);
- time_info->first_injected = 1;
+ if ( !pt->first_injected ) {
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(v);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
+ pt->first_injected = 1;
} else {
- time_info->pending_intr_nr--;
+ pt->pending_intr_nr--;
+ pt->last_plt_gtime += pt->period_cycles;
+ svm_set_guest_time(v, pt->last_plt_gtime);
}
- time_info->count_advance = 0;
- time_info->count_point = NOW();
-
- time_info->last_pit_gtime += time_info->period_cycles;
- svm_set_guest_time(v, time_info->last_pit_gtime);
}
switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
struct hvm_domain *plat=&v->domain->arch.hvm_domain;
- struct hvm_virpit *vpit = &plat->vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &plat->pl_time.periodic_tm;
struct hvm_virpic *pic= &plat->vpic;
int intr_type = VLAPIC_DELIV_MODE_EXT;
int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
if ( cpu_has_pending_irq(v) ) {
intr_vector = cpu_get_interrupt(v, &intr_type);
}
- else if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
- pic_set_irq(pic, 0, 0);
- pic_set_irq(pic, 0, 1);
+ else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+ pic_set_irq(pic, pt->irq, 0);
+ pic_set_irq(pic, pt->irq, 1);
intr_vector = cpu_get_interrupt(v, &intr_type);
}
}
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
/* Re-injecting a PIT interruptt? */
if (re_injecting &&
is_pit_irq(v, intr_vector, intr_type)) {
- ++time_info->pending_intr_nr;
+ ++pt->pending_intr_nr;
}
/* let's inject this interrupt */
TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Fri May 26 13:41:49 2006 -0600
@@ -51,13 +51,6 @@
#define SVM_EXTRA_DEBUG
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
/* Useful define */
#define MAX_INST_SIZE 15
@@ -672,12 +665,11 @@ static void arch_svm_do_launch(struct vc
static void svm_freeze_time(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
- if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time )
{
- v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
- time_info->count_advance += (NOW() - time_info->count_point);
- stop_timer(&(time_info->pit_timer));
+ if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+ v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+ stop_timer(&(pt->timer));
}
}
@@ -754,7 +746,7 @@ static void svm_relinquish_guest_resourc
}
}
- kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+ kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -784,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
void svm_migrate_timers(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
-
- migrate_timer(&time_info->pit_timer, v->processor);
- migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ if ( pt->enabled ) {
+ migrate_timer( &pt->timer, v->processor );
+ migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+ }
if ( hvm_apic_support(v->domain) && VLAPIC( v ))
migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
}
@@ -816,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
return 1;
handle_mmio(va, va);
- TRACE_VMEXIT(2,2);
return 1;
}
@@ -842,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
return 1;
}
- TRACE_VMEXIT (2,2);
handle_mmio(va, gpa);
return 1;
@@ -854,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
/* Let's make sure that the Guest TLB is flushed */
set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
}
-
- TRACE_VMEXIT (2,result);
return result;
}
@@ -1901,14 +1891,8 @@ static inline void svm_do_msr_access(str
regs->edx = 0;
switch (regs->ecx) {
case MSR_IA32_TIME_STAMP_COUNTER:
- {
- struct hvm_time_info *time_info;
-
- rdtscll(msr_content);
- time_info = &v->domain->arch.hvm_domain.vpit.time_info;
- msr_content += time_info->cache_tsc_offset;
+ msr_content = hvm_get_guest_time(v);
break;
- }
case MSR_IA32_SYSENTER_CS:
msr_content = vmcb->sysenter_cs;
break;
@@ -1975,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
s_time_t next_pit = -1, next_wakeup;
__update_guest_eip(vmcb, 1);
@@ -1985,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
return;
if ( !v->vcpu_id )
- next_pit = get_pit_scheduled(v, vpit);
+ next_pit = get_scheduled(v, pt->irq, pt);
next_wakeup = get_apictime_scheduled(v);
if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Fri May 26 13:41:49 2006 -0600
@@ -442,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
void svm_do_resume(struct vcpu *v)
{
struct domain *d = v->domain;
- struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
svm_stts(v);
/* pick up the elapsed PIT ticks and re-enable pit_timer */
- if ( time_info->first_injected ) {
- if ( v->domain->arch.hvm_domain.guest_time ) {
- svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
- time_info->count_point = NOW();
- v->domain->arch.hvm_domain.guest_time = 0;
+ if ( pt->enabled && pt->first_injected ) {
+ if ( v->arch.hvm_vcpu.guest_time ) {
+ svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+ v->arch.hvm_vcpu.guest_time = 0;
}
- pickup_deactive_ticks(vpit);
+ pickup_deactive_ticks(pt);
}
if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri May 26 13:41:49 2006 -0600
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64 offset)
#endif
}
-u64 get_guest_time(struct vcpu *v)
-{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
- u64 host_tsc;
-
- rdtscll(host_tsc);
- return host_tsc + time_info->cache_tsc_offset;
-}
-
void set_guest_time(struct vcpu *v, u64 gtime)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
u64 host_tsc;
rdtscll(host_tsc);
- time_info->cache_tsc_offset = gtime - host_tsc;
- __set_tsc_offset(time_info->cache_tsc_offset);
+ v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+ __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
}
static inline void
interrupt_post_injection(struct vcpu * v, int vector, int type)
{
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
if ( is_pit_irq(v, vector, type) ) {
- if ( !time_info->first_injected ) {
- time_info->pending_intr_nr = 0;
- time_info->last_pit_gtime = get_guest_time(v);
- time_info->first_injected = 1;
+ if ( !pt->first_injected ) {
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(v);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
+ pt->first_injected = 1;
} else {
- time_info->pending_intr_nr--;
- }
- time_info->count_advance = 0;
- time_info->count_point = NOW();
-
- time_info->last_pit_gtime += time_info->period_cycles;
- set_guest_time(v, time_info->last_pit_gtime);
+ pt->pending_intr_nr--;
+ pt->last_plt_gtime += pt->period_cycles;
+ set_guest_time(v, pt->last_plt_gtime);
+ }
}
switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
unsigned long eflags;
struct vcpu *v = current;
struct hvm_domain *plat=&v->domain->arch.hvm_domain;
- struct hvm_time_info *time_info = &plat->vpit.time_info;
+ struct periodic_time *pt = &plat->pl_time.periodic_tm;
struct hvm_virpic *pic= &plat->vpic;
unsigned int idtv_info_field;
unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
if ( v->vcpu_id == 0 )
hvm_pic_assist(v);
- if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
- pic_set_irq(pic, 0, 0);
- pic_set_irq(pic, 0, 1);
+ if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+ pic_set_irq(pic, pt->irq, 0);
+ pic_set_irq(pic, pt->irq, 1);
}
has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
void vmx_do_resume(struct vcpu *v)
{
struct domain *d = v->domain;
- struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
vmx_stts();
/* pick up the elapsed PIT ticks and re-enable pit_timer */
- if ( time_info->first_injected ) {
- if ( v->domain->arch.hvm_domain.guest_time ) {
- time_info->count_point = NOW();
- set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
- v->domain->arch.hvm_domain.guest_time = 0;
- }
- pickup_deactive_ticks(vpit);
+ if ( pt->enabled && pt->first_injected ) {
+ if ( v->arch.hvm_vcpu.guest_time ) {
+ set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+ v->arch.hvm_vcpu.guest_time = 0;
+ }
+ pickup_deactive_ticks(pt);
}
if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri May 26 13:41:49 2006 -0600
@@ -47,7 +47,7 @@
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
}
}
- kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+ kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
static void vmx_freeze_time(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
- if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time )
{
- v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
- time_info->count_advance += (NOW() - time_info->count_point);
- stop_timer(&(time_info->pit_timer));
+ if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+ v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+ stop_timer(&(pt->timer));
}
}
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
void vmx_migrate_timers(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
-
- migrate_timer(&time_info->pit_timer, v->processor);
- migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ if ( pt->enabled ) {
+ migrate_timer(&pt->timer, v->processor);
+ migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+ }
if ( hvm_apic_support(v->domain) && VLAPIC(v))
migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
}
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
(unsigned long)regs->edx);
switch (regs->ecx) {
case MSR_IA32_TIME_STAMP_COUNTER:
- {
- struct hvm_time_info *time_info;
-
- rdtscll(msr_content);
- time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
- msr_content += time_info->cache_tsc_offset;
- break;
- }
+ msr_content = hvm_get_guest_time(v);
+ break;
case MSR_IA32_SYSENTER_CS:
__vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
void vmx_vmexit_do_hlt(void)
{
struct vcpu *v=current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
s_time_t next_pit=-1,next_wakeup;
if ( !v->vcpu_id )
- next_pit = get_pit_scheduled(v,vpit);
+ next_pit = get_scheduled(v, pt->irq, pt);
next_wakeup = get_apictime_scheduled(v);
if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/mm.c Fri May 26 13:41:49 2006 -0600
@@ -260,9 +260,42 @@ void share_xen_page_with_privileged_gues
share_xen_page_with_guest(page, dom_xen, readonly);
}
+static void __write_ptbase(unsigned long mfn)
+{
+#ifdef CONFIG_X86_PAE
+ if ( mfn >= 0x100000 )
+ {
+ l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+ struct vcpu *v = current;
+ unsigned long flags;
+
+ /* Protects against re-entry and against __pae_flush_pgd(). */
+ local_irq_save(flags);
+
+ /* Pick an unused low-memory L3 cache slot. */
+ v->arch.lowmem_l3tab_inuse ^= 1;
+ lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse];
+ v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn;
+
+ /* Map the guest L3 table and copy to the chosen low-memory cache. */
+ highmem_l3tab = map_domain_page(mfn);
+ memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab));
+ unmap_domain_page(highmem_l3tab);
+
+ /* Install the low-memory L3 table in CR3. */
+ write_cr3(__pa(lowmem_l3tab));
+
+ local_irq_restore(flags);
+ return;
+ }
+#endif
+
+ write_cr3(mfn << PAGE_SHIFT);
+}
+
void write_ptbase(struct vcpu *v)
{
- write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+ __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
}
void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +434,7 @@ static int get_page_and_type_from_pagenr
return 1;
}
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
@@ -456,6 +490,7 @@ get_linear_pagetable(
return 1;
}
+#endif /* !CONFIG_X86_PAE */
int
get_page_from_l1e(
@@ -564,10 +599,6 @@ get_page_from_l3e(
rc = get_page_and_type_from_pagenr(
l3e_get_pfn(l3e),
PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
- if ( unlikely(!rc) )
- rc = get_linear_pagetable(l3e, pfn, d);
-#endif
return rc;
}
#endif /* 3 level */
@@ -773,6 +804,50 @@ static int create_pae_xen_mappings(l3_pg
return 1;
}
+struct pae_flush_pgd {
+ unsigned long l3tab_mfn;
+ unsigned int l3tab_idx;
+ l3_pgentry_t nl3e;
+};
+
+static void __pae_flush_pgd(void *data)
+{
+ struct pae_flush_pgd *args = data;
+ struct vcpu *v = this_cpu(curr_vcpu);
+ int i = v->arch.lowmem_l3tab_inuse;
+ intpte_t _ol3e, _nl3e, _pl3e;
+ l3_pgentry_t *l3tab_ptr;
+
+ ASSERT(!local_irq_is_enabled());
+
+ if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn )
+ return;
+
+ l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx];
+
+ _ol3e = l3e_get_intpte(*l3tab_ptr);
+ _nl3e = l3e_get_intpte(args->nl3e);
+ _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+ BUG_ON(_pl3e != _ol3e);
+}
+
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+ unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+ struct domain *d = page_get_owner(mfn_to_page(mfn));
+ struct pae_flush_pgd args = {
+ .l3tab_mfn = mfn,
+ .l3tab_idx = idx,
+ .nl3e = nl3e };
+
+ /* If below 4GB then the pgdir is not shadowed in low memory. */
+ if ( mfn < 0x100000 )
+ return;
+
+ on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1);
+}
+
static inline int l1_backptr(
unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
{
@@ -787,6 +862,7 @@ static inline int l1_backptr(
#elif CONFIG_X86_64
# define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
static inline int l1_backptr(
unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +962,6 @@ static int alloc_l3_table(struct page_in
ASSERT(!shadow_mode_refcounts(d));
-#ifdef CONFIG_X86_PAE
- if ( pfn >= 0x100000 )
- {
- MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
- return 0;
- }
-#endif
-
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
@@ -1240,6 +1308,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
okay = create_pae_xen_mappings(pl3e);
BUG_ON(!okay);
+
+ pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
put_page_from_l3e(ol3e, pfn);
return 1;
@@ -3109,7 +3179,7 @@ void ptwr_flush(struct domain *d, const
if ( unlikely(d->arch.ptwr[which].vcpu != current) )
/* Don't use write_ptbase: it may switch to guest_user on x86/64! */
- write_cr3(pagetable_get_paddr(
+ __write_ptbase(pagetable_get_pfn(
d->arch.ptwr[which].vcpu->arch.guest_table));
else
TOGGLE_MODE();
@@ -3220,15 +3290,16 @@ static int ptwr_emulated_update(
/* Turn a sub-word access into a full-word access. */
if ( bytes != sizeof(paddr_t) )
{
- int rc;
- paddr_t full;
- unsigned int offset = addr & (sizeof(paddr_t)-1);
+ paddr_t full;
+ unsigned int offset = addr & (sizeof(paddr_t)-1);
/* Align address; read full word. */
addr &= ~(sizeof(paddr_t)-1);
- if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
- sizeof(paddr_t))) )
- return rc;
+ if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+ {
+ propagate_page_fault(addr, 4); /* user mode, read fault */
+ return X86EMUL_PROPAGATE_FAULT;
+ }
/* Mask out bits provided by caller. */
full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
/* Shift the caller value and OR in the missing bits. */
@@ -3306,7 +3377,8 @@ static int ptwr_emulated_write(
static int ptwr_emulated_write(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
return ptwr_emulated_update(addr, 0, val, bytes, 0);
}
@@ -3315,7 +3387,8 @@ static int ptwr_emulated_cmpxchg(
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
return ptwr_emulated_update(addr, old, new, bytes, 1);
}
@@ -3325,7 +3398,8 @@ static int ptwr_emulated_cmpxchg8b(
unsigned long old,
unsigned long old_hi,
unsigned long new,
- unsigned long new_hi)
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt)
{
if ( CONFIG_PAGING_LEVELS == 2 )
return X86EMUL_UNHANDLEABLE;
@@ -3334,7 +3408,7 @@ static int ptwr_emulated_cmpxchg8b(
addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
}
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
.read_std = x86_emulate_read_std,
.write_std = x86_emulate_write_std,
.read_emulated = x86_emulate_read_std,
@@ -3353,6 +3427,7 @@ int ptwr_do_page_fault(struct domain *d,
l2_pgentry_t *pl2e, l2e;
int which, flags;
unsigned long l2_idx;
+ struct x86_emulate_ctxt emul_ctxt;
if ( unlikely(shadow_mode_enabled(d)) )
return 0;
@@ -3507,8 +3582,10 @@ int ptwr_do_page_fault(struct domain *d,
return EXCRET_fault_fixed;
emulate:
- if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
- &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+ emul_ctxt.regs = guest_cpu_user_regs();
+ emul_ctxt.cr2 = addr;
+ emul_ctxt.mode = X86EMUL_MODE_HOST;
+ if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
return 0;
perfc_incrc(ptwr_emulations);
return EXCRET_fault_fixed;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/traps.c Fri May 26 13:41:49 2006 -0600
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct
PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
break;
}
- regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
break;
case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct
outl_user((u32)data, (u16)regs->edx, v, regs);
break;
}
- regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
break;
}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/x86_emulate.c Fri May 26 13:41:49 2006 -0600
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (
#endif /* __i386__ */
/* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
- if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
- goto done; \
- (_eip) += (_size); \
- (_type)_x; \
+#define insn_fetch(_type, _size, _eip) \
+({ unsigned long _x; \
+ rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt); \
+ if ( rc != 0 ) \
+ goto done; \
+ (_eip) += (_size); \
+ (_type)_x; \
})
/* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
return p;
}
-int
+int
x86_emulate_memop(
- struct cpu_user_regs *regs,
- unsigned long cr2,
- struct x86_mem_emulator *ops,
- int mode)
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
{
uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
int rc = 0;
struct operand src, dst;
+ unsigned long cr2 = ctxt->cr2;
+ int mode = ctxt->mode;
/* Shadow copy of register state. Committed on successful emulation. */
- struct cpu_user_regs _regs = *regs;
+ struct cpu_user_regs _regs = *ctxt->regs;
switch ( mode )
{
@@ -628,7 +629,7 @@ x86_emulate_memop(
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)dst.ptr,
- &dst.val, dst.bytes)) != 0) )
+ &dst.val, dst.bytes, ctxt)) != 0) )
goto done;
break;
}
@@ -670,7 +671,7 @@ x86_emulate_memop(
src.type = OP_MEM;
src.ptr = (unsigned long *)cr2;
if ( (rc = ops->read_emulated((unsigned long)src.ptr,
- &src.val, src.bytes)) != 0 )
+ &src.val, src.bytes, ctxt)) != 0 )
goto done;
src.orig_val = src.val;
break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
if ( mode == X86EMUL_MODE_PROT64 )
dst.bytes = 8;
if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
- &dst.val, dst.bytes)) != 0 )
+ &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
register_address_increment(_regs.esp, dst.bytes);
break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
{
dst.bytes = 8;
if ( (rc = ops->read_std((unsigned long)dst.ptr,
- &dst.val, 8)) != 0 )
+ &dst.val, 8, ctxt)) != 0 )
goto done;
}
- register_address_increment(_regs.esp, -dst.bytes);
+ register_address_increment(_regs.esp, -(int)dst.bytes);
if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
- dst.val, dst.bytes)) != 0 )
+ dst.val, dst.bytes, ctxt)) != 0 )
goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */
break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
case OP_MEM:
if ( lock_prefix )
rc = ops->cmpxchg_emulated(
- (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+ (unsigned long)dst.ptr, dst.orig_val,
+ dst.val, dst.bytes, ctxt);
else
rc = ops->write_emulated(
- (unsigned long)dst.ptr, dst.val, dst.bytes);
+ (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
if ( rc != 0 )
goto done;
default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
}
/* Commit shadow register state. */
- *regs = _regs;
+ *ctxt->regs = _regs;
done:
return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
{
if ( _regs.ecx == 0 )
{
- regs->eip = _regs.eip;
+ ctxt->regs->eip = _regs.eip;
goto done;
}
_regs.ecx--;
- _regs.eip = regs->eip;
+ _regs.eip = ctxt->regs->eip;
}
switch ( b )
{
@@ -928,20 +930,21 @@ x86_emulate_memop(
dst.ptr = (unsigned long *)cr2;
if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
_regs.esi),
- &dst.val, dst.bytes)) != 0 )
+ &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
}
else
{
/* Read fault: source is special memory. */
dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
- if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &dst.val,
+ dst.bytes, ctxt)) != 0 )
goto done;
}
register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
dst.ptr = (unsigned long *)cr2;
dst.val = _regs.eax;
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xac ... 0xad: /* lods */
dst.type = OP_REG;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.ptr = (unsigned long *)&_regs.eax;
- if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
#if defined(__i386__)
{
unsigned long old_lo, old_hi;
- if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
- ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+ if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+ ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
goto done;
if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
{
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
}
else
{
- if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
- _regs.ebx, _regs.ecx)) != 0 )
+ if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+ _regs.ecx, ctxt)) != 0 )
goto done;
_regs.eflags |= EFLG_ZF;
}
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
#elif defined(__x86_64__)
{
unsigned long old, new;
- if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
goto done;
if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
else
{
new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
- if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+ if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
goto done;
_regs.eflags |= EFLG_ZF;
}
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
x86_emulate_read_std(
unsigned long addr,
unsigned long *val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
*val = 0;
if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
x86_emulate_write_std(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
if ( copy_to_user((void *)addr, (void *)&val, bytes) )
{
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/Makefile
--- a/xen/common/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/Makefile Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
obj-y += page_alloc.o
obj-y += rangeset.o
obj-y += sched_bvt.o
+obj-y += sched_credit.o
obj-y += sched_sedf.o
obj-y += schedule.o
obj-y += softirq.o
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/grant_table.c Fri May 26 13:41:49 2006 -0600
@@ -505,15 +505,12 @@ gnttab_setup_table(
goto out;
}
- if ( op.nr_frames <= NR_GRANT_FRAMES )
- {
- ASSERT(d->grant_table != NULL);
- op.status = GNTST_okay;
- for ( i = 0; i < op.nr_frames; i++ )
- {
- gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
- (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
- }
+ ASSERT(d->grant_table != NULL);
+ op.status = GNTST_okay;
+ for ( i = 0; i < op.nr_frames; i++ )
+ {
+ gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+ (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
}
put_domain(d);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/kernel.c
--- a/xen/common/kernel.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/kernel.c Fri May 26 13:41:49 2006 -0600
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
switch ( fi.submap_idx )
{
case 0:
- fi.submap = 0;
+ fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
if ( shadow_mode_translate(current->domain) )
fi.submap |=
(1U << XENFEAT_writable_page_tables) |
- (1U << XENFEAT_auto_translated_physmap) |
- (1U << XENFEAT_pae_pgdir_above_4gb);
+ (1U << XENFEAT_auto_translated_physmap);
if ( supervisor_mode_kernel )
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
break;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/schedule.c
--- a/xen/common/schedule.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/schedule.c Fri May 26 13:41:49 2006 -0600
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
extern struct scheduler sched_bvt_def;
extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
static struct scheduler *schedulers[] = {
&sched_bvt_def,
&sched_sedf_def,
+ &sched_credit_def,
NULL
};
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
page_scrub_schedule_work();
+ SCHED_OP(tick, cpu);
+
set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
}
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
printk("Could not find scheduler: %s\n", opt_sched);
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+ SCHED_OP(init);
if ( idle_vcpu[0] != NULL )
{
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/trace.c
--- a/xen/common/trace.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/trace.c Fri May 26 13:41:49 2006 -0600
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
{
printk("Xen trace buffers: memory allocation failed\n");
+ opt_tbuf_size = 0;
return -EINVAL;
}
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
opt_tbuf_size = size;
if ( alloc_trace_bufs() != 0 )
- {
- opt_tbuf_size = 0;
- return -EINVAL;
- }
+ return -EINVAL;
printk("Xen trace buffers: initialized\n");
return 0;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/domain.h Fri May 26 13:41:49 2006 -0600
@@ -120,6 +120,18 @@ struct arch_vcpu
struct vcpu_guest_context guest_context
__attribute__((__aligned__(16)));
+#ifdef CONFIG_X86_PAE
+ /*
+ * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+ * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+ * an L3 table while we are currently running on it (without using
+ * expensive atomic 64-bit operations).
+ */
+ l3_pgentry_t lowmem_l3tab[2][4] __attribute__((__aligned__(32)));
+ unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */
+ unsigned int lowmem_l3tab_inuse; /* Which lowmem_l3tab is in use? */
+#endif
+
unsigned long flags; /* TF_ */
void (*schedule_tail) (struct vcpu *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/domain.h Fri May 26 13:41:49 2006 -0600
@@ -35,9 +35,9 @@ struct hvm_domain {
unsigned int nr_vcpus;
unsigned int apic_enabled;
unsigned int pae_enabled;
-
- struct hvm_virpit vpit;
- u64 guest_time;
+ s64 tsc_frequency;
+ struct pl_time pl_time;
+
struct hvm_virpic vpic;
struct hvm_vioapic vioapic;
struct hvm_io_handler io_handler;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/intr.h Fri May 26 13:41:49 2006 -0600
@@ -21,7 +21,6 @@
#ifndef __ASM_X86_HVM_SVM_INTR_H__
#define __ASM_X86_HVM_SVM_INTR_H__
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
extern void svm_intr_assist(void);
extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
extern void svm_intr_assist_test_valid(struct vcpu *v,
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/svm.h Fri May 26 13:41:49 2006 -0600
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
extern void svm_do_launch(struct vcpu *v);
extern void svm_do_resume(struct vcpu *v);
extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
extern void arch_svm_do_resume(struct vcpu *v);
extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
/* For debugging. Remove when no longer needed. */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,9 @@ struct hvm_vcpu {
unsigned long ioflags;
struct mmio_op mmio_op;
struct vlapic *vlapic;
+ s64 cache_tsc_offset;
+ u64 guest_time;
+
/* For AP startup */
unsigned long init_sipi_sipi_state;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri May 26 13:41:49 2006 -0600
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 get_guest_time(struct vcpu *v);
extern unsigned int cpu_rev;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vpit.h Fri May 26 13:41:49 2006 -0600
@@ -29,9 +29,7 @@
#include <asm/hvm/vpic.h>
#define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE 0x40
typedef struct PITChannelState {
int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
u8 gate; /* timer start */
s64 count_load_time;
/* irq handling */
- s64 next_transition_time;
- int irq;
- struct hvm_time_info *hvm_time;
- u32 period; /* period(ns) based on count */
+ struct vcpu *vcpu;
+ struct periodic_time *pt;
} PITChannelState;
-
-struct hvm_time_info {
- /* extra info for the mode 2 channel */
- struct timer pit_timer;
- struct vcpu *vcpu; /* which vcpu the ac_timer bound to */
- u64 period_cycles; /* pit frequency in cpu cycles */
- s_time_t count_advance; /* accumulated count advance since last fire */
- s_time_t count_point; /* last point accumulating count advance */
- unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
- int first_injected; /* flag to prevent shadow window */
- s64 cache_tsc_offset; /* cache of VMCS TSC_OFFSET offset */
- u64 last_pit_gtime; /* guest time when last pit is injected */
+
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+ char enabled; /* enabled */
+ char one_shot; /* one shot time */
+ char irq;
+ char first_injected; /* flag to prevent shadow window */
+ u32 pending_intr_nr; /* the couner for pending timer interrupts */
+ u32 period; /* frequency in ns */
+ u64 period_cycles; /* frequency in cpu cycles */
+ s_time_t scheduled; /* scheduled timer interrupt */
+ u64 last_plt_gtime; /* platform time when last IRQ is injected */
+ struct timer timer; /* ac_timer */
};
-typedef struct hvm_virpit {
+typedef struct PITState {
PITChannelState channels[3];
- struct hvm_time_info time_info;
int speaker_data_on;
int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
+struct pl_time { /* platform time */
+ struct periodic_time periodic_tm;
+ struct PITState vpit;
+ /* TODO: RTC/ACPI time */
+};
-static __inline__ s_time_t get_pit_scheduled(
- struct vcpu *v,
- struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+ struct vcpu *v, int irq,
+ struct periodic_time *pt)
{
- struct PITChannelState *s = &(vpit->channels[0]);
- if ( is_irq_enabled(v, 0) ) {
- return s->next_transition_time;
+ if ( is_irq_enabled(v, irq) ) {
+ return pt->scheduled;
}
else
return -1;
}
/* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period,
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
#endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/string.h Fri May 26 13:41:49 2006 -0600
@@ -2,152 +2,6 @@
#define __X86_STRING_H__
#include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
- long d0, d1, d2;
- __asm__ __volatile__ (
- "1: lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2)
- : "0" (src), "1" (dest) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- "1: dec %2 \n"
- " js 2f \n"
- " lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " rep ; stosb \n"
- "2: \n"
- : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- : "0" (src), "1" (dest), "2" (count) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " dec %1 \n"
- "1: lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " dec %1 \n"
- " mov %8,%3 \n"
- "1: dec %3 \n"
- " js 2f \n"
- " lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- "2: xor %%eax,%%eax\n"
- " stosb"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
- : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
- long d0, d1;
- register int __res;
- __asm__ __volatile__ (
- "1: lodsb \n"
- " scasb \n"
- " jne 2f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " xor %%eax,%%eax\n"
- " jmp 3f \n"
- "2: sbb %%eax,%%eax\n"
- " or $1,%%al \n"
- "3: \n"
- : "=a" (__res), "=&S" (d0), "=&D" (d1)
- : "1" (cs), "2" (ct) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
- long d0, d1, d2;
- register int __res;
- __asm__ __volatile__ (
- "1: dec %3 \n"
- " js 2f \n"
- " lodsb \n"
- " scasb \n"
- " jne 3f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- "2: xor %%eax,%%eax\n"
- " jmp 4f \n"
- "3: sbb %%eax,%%eax\n"
- " or $1,%%al \n"
- "4: \n"
- : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- : "1" (cs), "2" (ct), "3" (count) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
- long d0;
- register char *__res;
- __asm__ __volatile__ (
- " mov %%al,%%ah \n"
- "1: lodsb \n"
- " cmp %%ah,%%al \n"
- " je 2f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " mov $1,%1 \n"
- "2: mov %1,%0 \n"
- " dec %0 \n"
- : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
- long d0;
- register int __res;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " notl %0 \n"
- " decl %0 \n"
- : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
- return __res;
-}
static inline void *__variable_memcpy(void *to, const void *from, size_t n)
{
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
#define __HAVE_ARCH_MEMCMP
#define memcmp __builtin_memcmp
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
- long d0;
- register void *__res;
- if ( count == 0 )
- return NULL;
- __asm__ __volatile__ (
- " repne ; scasb\n"
- " je 1f \n"
- " mov $1,%0 \n"
- "1: dec %0 \n"
- : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
- return __res;
-}
-
static inline void *__memset_generic(void *s, char c, size_t count)
{
long d0, d1;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/x86_emulate.h Fri May 26 13:41:49 2006 -0600
@@ -9,8 +9,10 @@
#ifndef __X86_EMULATE_H__
#define __X86_EMULATE_H__
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
*
* These operations represent the instruction emulator's interface to memory.
* There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */
#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
{
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
int (*read_std)(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
int (*write_std)(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
int (*read_emulated)(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
int (*write_emulated)(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes);
-
- /*
- * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
- * emulated/special memory area.
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+ * emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
- unsigned long new_hi);
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt);
};
/* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
x86_emulate_read_std(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
extern int
x86_emulate_write_std(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+ /* Register state before/after emulation. */
+ struct cpu_user_regs *regs;
+
+ /* Linear faulting address (if emulating a page-faulting instruction). */
+ unsigned long cr2;
+
+ /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+ int mode;
+};
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
- * @regs: Register state at time of fault.
- * @cr2: Linear faulting address within an emulated/special memory area.
- * @ops: Interface to access special memory.
- * @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
* Returns -1 on failure, 0 on success.
*/
-extern int
+int
x86_emulate_memop(
- struct cpu_user_regs *regs,
- unsigned long cr2,
- struct x86_mem_emulator *ops,
- int mode);
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops);
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
-extern void *
+void *
decode_register(
uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/io/xenbus.h Fri May 26 13:41:49 2006 -0600
@@ -9,34 +9,37 @@
#ifndef _XEN_PUBLIC_IO_XENBUS_H
#define _XEN_PUBLIC_IO_XENBUS_H
-/* The state of either end of the Xenbus, i.e. the current communication
- status of initialisation across the bus. States here imply nothing about
- the state of the connection between the driver and the kernel's device
- layers. */
-typedef enum
-{
- XenbusStateUnknown = 0,
- XenbusStateInitialising = 1,
- XenbusStateInitWait = 2, /* Finished early initialisation, but waiting
- for information from the peer or hotplug
- scripts. */
- XenbusStateInitialised = 3, /* Initialised and waiting for a connection
- from the peer. */
- XenbusStateConnected = 4,
- XenbusStateClosing = 5, /* The device is being closed due to an error
- or an unplug event. */
- XenbusStateClosed = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
-} XenbusState;
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6
+};
+typedef enum xenbus_state XenbusState;
#endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/sched_ctl.h Fri May 26 13:41:49 2006 -0600
@@ -10,6 +10,7 @@
/* Scheduler types. */
#define SCHED_BVT 0
#define SCHED_SEDF 4
+#define SCHED_CREDIT 5
/* Set or get info? */
#define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
uint32_t extratime;
uint32_t weight;
} sedf;
+ struct csched_domain {
+ uint16_t weight;
+ uint16_t cap;
+ } credit;
} u;
};
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/sched-if.h Fri May 26 13:41:49 2006 -0600
@@ -58,6 +58,8 @@ struct scheduler {
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void (*init) (void);
+ void (*tick) (unsigned int cpu);
int (*alloc_task) (struct vcpu *);
void (*add_task) (struct vcpu *);
void (*free_task) (struct domain *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/softirq.h Fri May 26 13:41:49 2006 -0600
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
asmlinkage void do_softirq(void);
extern void open_softirq(int nr, softirq_handler handler);
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, mask)
+ {
+ if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+ cpu_clear(cpu, mask);
+ }
+
+ smp_send_event_check_mask(mask);
+}
+
static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
{
if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri May 26
13:41:49 2006 -0600
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+ /*
+ * We assume a CPU hotplug request comes from local admin if it is made
+ * via a userspace process (i.e., one with a real mm_struct).
+ */
+ return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+ int err;
+ char dir[32], state[32];
+
+ if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+ return;
+
+ sprintf(dir, "cpu/%d", cpu);
+ err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0) {
+ cpu_set(cpu, xenbus_allowed_cpumask);
+ (void)cpu_up(cpu);
+ } else if (strcmp(state, "offline") == 0) {
+ cpu_clear(cpu, xenbus_allowed_cpumask);
+ (void)cpu_down(cpu);
+ } else {
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+ state, cpu);
+ }
+}
+
+static void handle_vcpu_hotplug_event(
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+ int cpu;
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ if ((cpustr = strstr(node, "cpu/")) != NULL) {
+ sscanf(cpustr, "cpu/%d", &cpu);
+ vcpu_hotplug(cpu);
+ }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ /*
+ * We do this in a callback notifier rather than __cpu_disable()
+ * because local_cpu_hotplug_request() does not work in the latter
+ * as it's always executed from within a stopmachine kthread.
+ */
+ if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+ cpu_clear(cpu, local_allowed_cpumask);
+
+ return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ int i;
+
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event,
+ .flags = XBWF_new_thread };
+ (void)register_xenbus_watch(&cpu_watch);
+
+ if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+ for_each_cpu(i)
+ vcpu_hotplug(i);
+ printk(KERN_INFO "Brought up %ld CPUs\n",
+ (long)num_online_cpus());
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block hotplug_cpu = {
+ .notifier_call = smpboot_cpu_notify };
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+
+ register_cpu_notifier(&hotplug_cpu);
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+ int i, err;
+
+ lock_cpu_hotplug();
+
+ /*
+ * Take all other CPUs offline. We hold the hotplug mutex to
+ * avoid other processes bringing up CPUs under our feet.
+ */
+ while (num_online_cpus() > 1) {
+ unlock_cpu_hotplug();
+ for_each_online_cpu(i) {
+ if (i == 0)
+ continue;
+ err = cpu_down(i);
+ if (err) {
+ printk(KERN_CRIT "Failed to take all CPUs "
+ "down: %d.\n", err);
+ for_each_cpu(i)
+ vcpu_hotplug(i);
+ return err;
+ }
+ }
+ lock_cpu_hotplug();
+ }
+
+ return 0;
+}
+
+void smp_resume(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu)
+ cpu_initialize_context(cpu);
+
+ unlock_cpu_hotplug();
+
+ for_each_cpu(cpu)
+ vcpu_hotplug(cpu);
+}
+
+int cpu_up_is_allowed(unsigned int cpu)
+{
+ int rc = 0;
+
+ if (local_cpu_hotplug_request()) {
+ cpu_set(cpu, local_allowed_cpumask);
+ if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+ printk("%s: attempt to bring up CPU %u disallowed by "
+ "remote admin.\n", __FUNCTION__, cpu);
+ rc = -EBUSY;
+ }
+ } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+ !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+ rc = -EBUSY;
+ }
+
+ return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+ xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h Fri May
26 13:41:49 2006 -0600
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples. setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 128 /* number of entries in E820MAP */
+#define E820NR 0x1e8 /* # entries in E820MAP */
+
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS 4
+
+#define HIGH_MEMORY (1024*1024)
+
+#define LOWMEMSIZE() (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+ u64 addr; /* start of memory segment */
+ u64 size; /* size of memory segment */
+ u32 type; /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+ int nr_map;
+ struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end,
+ unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size,
+ int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void);
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+ unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Fri May 26 13:41:49
2006 -0600
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu) ((void)0)
+#endif
+
+int cpu_up_is_allowed(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_is_allowed(cpu) (1)
+#define init_xenbus_allowed_cpumask() ((void)0)
+
+static inline int smp_suspend(void)
+{
+ if (num_online_cpus() > 1) {
+ printk(KERN_WARNING "Can't suspend SMP guests "
+ "without CONFIG_HOTPLUG_CPU\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Fri May 26 13:41:49
2006 -0600
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 2006-05-02
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c 2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+ u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
+
+- if (!PCI_DMA_BUS_IS_PHYS) {
+- addr = BLK_BOUNCE_ANY;
+- } else if (on && drive->media == ide_disk) {
+- if (HWIF(drive)->pci_dev)
++ if (on && drive->media == ide_disk) {
++ if (!PCI_DMA_BUS_IS_PHYS)
++ addr = BLK_BOUNCE_ANY;
++ else if (HWIF(drive)->pci_dev)
+ addr = HWIF(drive)->pci_dev->dma_mask;
+ }
+
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_csched.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ * File: xc_csched.c
+ * Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_csched_domain_set(
+ int xc_handle,
+ uint32_t domid,
+ struct csched_domain *sdom)
+{
+ DECLARE_DOM0_OP;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t) domid;
+ op.u.adjustdom.sched_id = SCHED_CREDIT;
+ op.u.adjustdom.direction = SCHED_INFO_PUT;
+ op.u.adjustdom.u.credit = *sdom;
+
+ return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_csched_domain_get(
+ int xc_handle,
+ uint32_t domid,
+ struct csched_domain *sdom)
+{
+ DECLARE_DOM0_OP;
+ int err;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t) domid;
+ op.u.adjustdom.sched_id = SCHED_CREDIT;
+ op.u.adjustdom.direction = SCHED_INFO_GET;
+
+ err = do_dom0_op(xc_handle, &op);
+ if ( err == 0 )
+ *sdom = op.u.adjustdom.u.credit;
+
+ return err;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_linux.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+ int fd;
+ int rc;
+ evtchn_port_t port;
+ char str[20];
+
+ fd = open(XENSTORED_PROC_PORT, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ rc = read(fd, str, sizeof(str));
+ if (rc == -1)
+ {
+ int err = errno;
+ close(fd);
+ errno = err;
+ return -1;
+ }
+
+ str[rc] = '\0';
+ port = strtoul(str, NULL, 0);
+
+ close(fd);
+ return port;
+}
+
+void *xenbus_map(void)
+{
+ int fd;
+ void *addr;
+
+ fd = open(XENSTORED_PROC_KVA, O_RDWR);
+ if (fd == -1)
+ return NULL;
+
+ addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ if (addr == MAP_FAILED)
+ addr = NULL;
+
+ close(fd);
+
+ return addr;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/sched_credit.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ * File: common/csched_credit.c
+ * Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK 10 /* milliseconds */
+#define CSCHED_TSLICE 30 /* milliseconds */
+#define CSCHED_ACCT_NTICKS 3
+#define CSCHED_ACCT_PERIOD (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT 256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
+#define CSCHED_PRI_IDLE -64 /* idle */
+#define CSCHED_PRI_TS_PARKED -65 /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c) ((struct csched_pcpu
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X) (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X) uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X) \
+ do \
+ { \
+ printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X)); \
+ } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO) \
+ _MACRO(vcpu_alloc) \
+ _MACRO(vcpu_add) \
+ _MACRO(vcpu_sleep) \
+ _MACRO(vcpu_wake_running) \
+ _MACRO(vcpu_wake_onrunq) \
+ _MACRO(vcpu_wake_runnable) \
+ _MACRO(vcpu_wake_not_runnable) \
+ _MACRO(dom_free) \
+ _MACRO(schedule) \
+ _MACRO(tickle_local_idler) \
+ _MACRO(tickle_local_over) \
+ _MACRO(tickle_local_under) \
+ _MACRO(tickle_local_other) \
+ _MACRO(acct_run) \
+ _MACRO(acct_no_work) \
+ _MACRO(acct_balance) \
+ _MACRO(acct_reorder) \
+ _MACRO(acct_min_credit) \
+ _MACRO(acct_vcpu_active) \
+ _MACRO(acct_vcpu_idle) \
+ _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \
+ _MACRO(vcpu_migrate) \
+ _MACRO(load_balance_idle) \
+ _MACRO(load_balance_over) \
+ _MACRO(load_balance_other) \
+ _MACRO(steal_trylock_failed) \
+ _MACRO(steal_peer_down) \
+ _MACRO(steal_peer_idle) \
+ _MACRO(steal_peer_running) \
+ _MACRO(steal_peer_pinned) \
+ _MACRO(tickle_idlers_none) \
+ _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \
+ _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO) \
+ CSCHED_STATS_EXPAND_SCHED(_MACRO) \
+ CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \
+ CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET() \
+ do \
+ { \
+ memset(&csched_priv.stats, 0, sizeof(csched_priv.stats)); \
+ } while ( 0 )
+
+#define CSCHED_STATS_DEFINE() \
+ struct \
+ { \
+ CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+ } stats
+
+#define CSCHED_STATS_PRINTK() \
+ do \
+ { \
+ printk("stats:\n"); \
+ CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+ } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET() do {} while ( 0 )
+#define CSCHED_STATS_DEFINE() do {} while ( 0 )
+#define CSCHED_STATS_PRINTK() do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X) do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+ struct list_head runq;
+ uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+ struct list_head runq_elem;
+ struct list_head active_vcpu_elem;
+ struct csched_dom *sdom;
+ struct vcpu *vcpu;
+ atomic_t credit;
+ int credit_last;
+ uint32_t credit_incr;
+ uint32_t state_active;
+ uint32_t state_idle;
+ int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+ struct list_head active_vcpu;
+ struct list_head active_sdom_elem;
+ struct domain *dom;
+ uint16_t active_vcpu_count;
+ uint16_t weight;
+ uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+ spinlock_t lock;
+ struct list_head active_sdom;
+ uint32_t ncpus;
+ unsigned int master;
+ cpumask_t idlers;
+ uint32_t weight;
+ uint32_t credit;
+ int credit_balance;
+ uint32_t runq_sort;
+ CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+ return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+ return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+ const struct list_head * const runq = RUNQ(cpu);
+ struct list_head *iter;
+
+ BUG_ON( __vcpu_on_runq(svc) );
+ BUG_ON( cpu != svc->vcpu->processor );
+
+ list_for_each( iter, runq )
+ {
+ const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+ if ( svc->pri > iter_svc->pri )
+ break;
+ }
+
+ list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+ BUG_ON( !__vcpu_on_runq(svc) );
+ list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+ struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+ cpumask_t mask;
+
+ ASSERT(cur);
+ cpus_clear(mask);
+
+ /* If strictly higher priority than current VCPU, signal the CPU */
+ if ( new->pri > cur->pri )
+ {
+ if ( cur->pri == CSCHED_PRI_IDLE )
+ CSCHED_STAT_CRANK(tickle_local_idler);
+ else if ( cur->pri == CSCHED_PRI_TS_OVER )
+ CSCHED_STAT_CRANK(tickle_local_over);
+ else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+ CSCHED_STAT_CRANK(tickle_local_under);
+ else
+ CSCHED_STAT_CRANK(tickle_local_other);
+
+ cpu_set(cpu, mask);
+ }
+
+ /*
+ * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+ * let them know there is runnable work in the system...
+ */
+ if ( cur->pri > CSCHED_PRI_IDLE )
+ {
+ if ( cpus_empty(csched_priv.idlers) )
+ {
+ CSCHED_STAT_CRANK(tickle_idlers_none);
+ }
+ else
+ {
+ CSCHED_STAT_CRANK(tickle_idlers_some);
+ cpus_or(mask, mask, csched_priv.idlers);
+ }
+ }
+
+ /* Send scheduler interrupts to designated CPUs */
+ if ( !cpus_empty(mask) )
+ cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+ struct csched_pcpu *spc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ /* Initialize/update system-wide config */
+ csched_priv.credit += CSCHED_ACCT_PERIOD;
+ if ( csched_priv.ncpus <= cpu )
+ csched_priv.ncpus = cpu + 1;
+ if ( csched_priv.master >= csched_priv.ncpus )
+ csched_priv.master = cpu;
+
+ /* Allocate per-PCPU info */
+ spc = xmalloc(struct csched_pcpu);
+ BUG_ON( spc == NULL );
+ INIT_LIST_HEAD(&spc->runq);
+ spc->runq_sort_last = csched_priv.runq_sort;
+ schedule_data[cpu].sched_priv = spc;
+
+ /* Start off idling... */
+ BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+ cpu_set(cpu, csched_priv.idlers);
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+
+ BUG_ON( svc->vcpu != vc );
+ BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+ if ( sdom )
+ {
+ BUG_ON( is_idle_vcpu(vc) );
+ BUG_ON( sdom->dom != vc->domain );
+ }
+ else
+ {
+ BUG_ON( !is_idle_vcpu(vc) );
+ }
+
+ CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+ /*
+ * Don't pick up work that's in the peer's scheduling tail. Also only pick
+ * up work that's allowed to run on our CPU.
+ */
+ if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_running);
+ return 0;
+ }
+
+ if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_pinned);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+ struct csched_dom * const sdom = svc->sdom;
+ unsigned long flags;
+
+ /* Update credits */
+ atomic_sub(credit_dec, &svc->credit);
+
+ /* Put this VCPU and domain back on the active list if it was idling */
+ if ( list_empty(&svc->active_vcpu_elem) )
+ {
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( list_empty(&svc->active_vcpu_elem) )
+ {
+ CSCHED_STAT_CRANK(acct_vcpu_active);
+ svc->state_active++;
+
+ sdom->active_vcpu_count++;
+ list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+ if ( list_empty(&sdom->active_sdom_elem) )
+ {
+ list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ csched_priv.weight += sdom->weight;
+ }
+ }
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+ struct csched_dom * const sdom = svc->sdom;
+
+ BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+ CSCHED_STAT_CRANK(acct_vcpu_idle);
+ svc->state_idle++;
+
+ sdom->active_vcpu_count--;
+ list_del_init(&svc->active_vcpu_elem);
+ if ( list_empty(&sdom->active_vcpu) )
+ {
+ BUG_ON( csched_priv.weight < sdom->weight );
+ list_del_init(&sdom->active_sdom_elem);
+ csched_priv.weight -= sdom->weight;
+ }
+
+ atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+ struct domain * const dom = vc->domain;
+ struct csched_dom *sdom;
+ struct csched_vcpu *svc;
+ int16_t pri;
+
+ CSCHED_STAT_CRANK(vcpu_alloc);
+
+ /* Allocate, if appropriate, per-domain info */
+ if ( is_idle_vcpu(vc) )
+ {
+ sdom = NULL;
+ pri = CSCHED_PRI_IDLE;
+ }
+ else if ( CSCHED_DOM(dom) )
+ {
+ sdom = CSCHED_DOM(dom);
+ pri = CSCHED_PRI_TS_UNDER;
+ }
+ else
+ {
+ sdom = xmalloc(struct csched_dom);
+ if ( !sdom )
+ return -1;
+
+ /* Initialize credit and weight */
+ INIT_LIST_HEAD(&sdom->active_vcpu);
+ sdom->active_vcpu_count = 0;
+ INIT_LIST_HEAD(&sdom->active_sdom_elem);
+ sdom->dom = dom;
+ sdom->weight = CSCHED_DEFAULT_WEIGHT;
+ sdom->cap = 0U;
+ dom->sched_priv = sdom;
+ pri = CSCHED_PRI_TS_UNDER;
+ }
+
+ /* Allocate per-VCPU info */
+ svc = xmalloc(struct csched_vcpu);
+ if ( !svc )
+ return -1;
+
+ INIT_LIST_HEAD(&svc->runq_elem);
+ INIT_LIST_HEAD(&svc->active_vcpu_elem);
+ svc->sdom = sdom;
+ svc->vcpu = vc;
+ atomic_set(&svc->credit, 0);
+ svc->credit_last = 0;
+ svc->credit_incr = 0U;
+ svc->state_active = 0U;
+ svc->state_idle = 0U;
+ svc->pri = pri;
+ vc->sched_priv = svc;
+
+ CSCHED_VCPU_CHECK(vc);
+
+ /* Attach fair-share VCPUs to the accounting list */
+ if ( likely(sdom != NULL) )
+ csched_vcpu_acct(svc, 0);
+
+ return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc)
+{
+ CSCHED_STAT_CRANK(vcpu_add);
+
+ /* Allocate per-PCPU info */
+ if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+ csched_pcpu_init(vc->processor);
+
+ CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+ unsigned long flags;
+
+ BUG_ON( sdom == NULL );
+ BUG_ON( !list_empty(&svc->runq_elem) );
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( !list_empty(&svc->active_vcpu_elem) )
+ __csched_vcpu_acct_idle_locked(svc);
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+ xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+ CSCHED_STAT_CRANK(vcpu_sleep);
+
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( schedule_data[vc->processor].curr == vc )
+ cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+ else if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ const unsigned int cpu = vc->processor;
+
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( unlikely(schedule_data[cpu].curr == vc) )
+ {
+ CSCHED_STAT_CRANK(vcpu_wake_running);
+ return;
+ }
+ if ( unlikely(__vcpu_on_runq(svc)) )
+ {
+ CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+ return;
+ }
+
+ if ( likely(vcpu_runnable(vc)) )
+ CSCHED_STAT_CRANK(vcpu_wake_runnable);
+ else
+ CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+ /* Put the VCPU on the runq and tickle CPUs */
+ __runq_insert(cpu, svc);
+ __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+ unsigned long flags;
+ int lcpu;
+
+ if ( vc == current )
+ {
+ /* No locking needed but also can't move on the spot... */
+ if ( !cpu_isset(vc->processor, *affinity) )
+ return -EBUSY;
+
+ vc->cpu_affinity = *affinity;
+ }
+ else
+ {
+ /* Pause, modify, and unpause. */
+ vcpu_pause(vc);
+
+ vc->cpu_affinity = *affinity;
+ if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+ {
+ /*
+ * We must grab the scheduler lock for the CPU currently owning
+ * this VCPU before changing its ownership.
+ */
+ vcpu_schedule_lock_irqsave(vc, flags);
+ lcpu = vc->processor;
+
+ vc->processor = first_cpu(vc->cpu_affinity);
+
+ spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+ }
+
+ vcpu_unpause(vc);
+ }
+
+ return 0;
+}
+
+static int
+csched_dom_cntl(
+ struct domain *d,
+ struct sched_adjdom_cmd *cmd)
+{
+ struct csched_dom * const sdom = CSCHED_DOM(d);
+ unsigned long flags;
+
+ if ( cmd->direction == SCHED_INFO_GET )
+ {
+ cmd->u.credit.weight = sdom->weight;
+ cmd->u.credit.cap = sdom->cap;
+ }
+ else
+ {
+ ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( cmd->u.credit.weight != 0 )
+ {
+ csched_priv.weight -= sdom->weight;
+ sdom->weight = cmd->u.credit.weight;
+ csched_priv.weight += sdom->weight;
+ }
+
+ if ( cmd->u.credit.cap != (uint16_t)~0U )
+ sdom->cap = cmd->u.credit.cap;
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ }
+
+ return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+ struct csched_dom * const sdom = CSCHED_DOM(dom);
+ int i;
+
+ CSCHED_STAT_CRANK(dom_free);
+
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ if ( dom->vcpu[i] )
+ csched_vcpu_free(dom->vcpu[i]);
+ }
+
+ xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+ struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+ struct list_head *runq, *elem, *next, *last_under;
+ struct csched_vcpu *svc_elem;
+ unsigned long flags;
+ int sort_epoch;
+
+ sort_epoch = csched_priv.runq_sort;
+ if ( sort_epoch == spc->runq_sort_last )
+ return;
+
+ spc->runq_sort_last = sort_epoch;
+
+ spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+ runq = &spc->runq;
+ elem = runq->next;
+ last_under = runq;
+
+ while ( elem != runq )
+ {
+ next = elem->next;
+ svc_elem = __runq_elem(elem);
+
+ if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+ {
+ /* does elem need to move up the runq? */
+ if ( elem->prev != last_under )
+ {
+ list_del(elem);
+ list_add(elem, last_under);
+ }
+ last_under = elem;
+ }
+
+ elem = next;
+ }
+
+ spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+ unsigned long flags;
+ struct list_head *iter_vcpu, *next_vcpu;
+ struct list_head *iter_sdom, *next_sdom;
+ struct csched_vcpu *svc;
+ struct csched_dom *sdom;
+ uint32_t credit_total;
+ uint32_t weight_total;
+ uint32_t weight_left;
+ uint32_t credit_fair;
+ uint32_t credit_peak;
+ int credit_balance;
+ int credit_xtra;
+ int credit;
+
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ weight_total = csched_priv.weight;
+ credit_total = csched_priv.credit;
+
+ /* Converge balance towards 0 when it drops negative */
+ if ( csched_priv.credit_balance < 0 )
+ {
+ credit_total -= csched_priv.credit_balance;
+ CSCHED_STAT_CRANK(acct_balance);
+ }
+
+ if ( unlikely(weight_total == 0) )
+ {
+ csched_priv.credit_balance = 0;
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ CSCHED_STAT_CRANK(acct_no_work);
+ return;
+ }
+
+ CSCHED_STAT_CRANK(acct_run);
+
+ weight_left = weight_total;
+ credit_balance = 0;
+ credit_xtra = 0;
+
+ list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ {
+ sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+ BUG_ON( is_idle_domain(sdom->dom) );
+ BUG_ON( sdom->active_vcpu_count == 0 );
+ BUG_ON( sdom->weight == 0 );
+ BUG_ON( sdom->weight > weight_left );
+
+ weight_left -= sdom->weight;
+
+ /*
+ * A domain's fair share is computed using its weight in competition
+ * with that of all other active domains.
+ *
+ * At most, a domain can use credits to run all its active VCPUs
+ * for one full accounting period. We allow a domain to earn more
+ * only when the system-wide credit balance is negative.
+ */
+ credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+ if ( csched_priv.credit_balance < 0 )
+ {
+ credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ (weight_total - 1)
+ ) / weight_total;
+ }
+ if ( sdom->cap != 0U )
+ {
+ uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) /
100;
+ if ( credit_cap < credit_peak )
+ credit_peak = credit_cap;
+ }
+
+ credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+ ) / weight_total;
+
+ if ( credit_fair < credit_peak )
+ {
+ credit_xtra = 1;
+ }
+ else
+ {
+ if ( weight_left != 0U )
+ {
+ /* Give other domains a chance at unused credits */
+ credit_total += ( ( ( credit_fair - credit_peak
+ ) * weight_total
+ ) + ( weight_left - 1 )
+ ) / weight_left;
+ }
+
+ if ( credit_xtra )
+ {
+ /*
+ * Lazily keep domains with extra credits at the head of
+ * the queue to give others a chance at them in future
+ * accounting periods.
+ */
+ CSCHED_STAT_CRANK(acct_reorder);
+ list_del(&sdom->active_sdom_elem);
+ list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ }
+
+ credit_fair = credit_peak;
+ }
+
+ /* Compute fair share per VCPU */
+ credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+ ) / sdom->active_vcpu_count;
+
+
+ list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+ {
+ svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+ BUG_ON( sdom != svc->sdom );
+
+ /* Increment credit */
+ atomic_add(credit_fair, &svc->credit);
+ credit = atomic_read(&svc->credit);
+
+ /*
+ * Recompute priority or, if VCPU is idling, remove it from
+ * the active list.
+ */
+ if ( credit < 0 )
+ {
+ if ( sdom->cap == 0U )
+ svc->pri = CSCHED_PRI_TS_OVER;
+ else
+ svc->pri = CSCHED_PRI_TS_PARKED;
+
+ if ( credit < -CSCHED_TSLICE )
+ {
+ CSCHED_STAT_CRANK(acct_min_credit);
+ credit = -CSCHED_TSLICE;
+ atomic_set(&svc->credit, credit);
+ }
+ }
+ else
+ {
+ svc->pri = CSCHED_PRI_TS_UNDER;
+
+ if ( credit > CSCHED_TSLICE )
+ __csched_vcpu_acct_idle_locked(svc);
+ }
+
+ svc->credit_last = credit;
+ svc->credit_incr = credit_fair;
+ credit_balance += credit;
+ }
+ }
+
+ csched_priv.credit_balance = credit_balance;
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+ /* Inform each CPU that its runq needs to be sorted */
+ csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct csched_dom * const sdom = svc->sdom;
+
+ /*
+ * Accounting for running VCPU
+ *
+ * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+ */
+ if ( likely(sdom != NULL) )
+ {
+ csched_vcpu_acct(svc, CSCHED_TICK);
+ }
+
+ /*
+ * Accounting duty
+ *
+ * Note: Currently, this is always done by the master boot CPU. Eventually,
+ * we could distribute or at the very least cycle the duty.
+ */
+ if ( (csched_priv.master == cpu) &&
+ (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+ {
+ csched_acct();
+ }
+
+ /*
+ * Check if runq needs to be sorted
+ *
+ * Every physical CPU resorts the runq after the accounting master has
+ * modified priorities. This is a special O(n) sort and runs at most
+ * once per accounting period (currently 30 milliseconds).
+ */
+ csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+ struct list_head *iter;
+ struct csched_vcpu *speer;
+ struct vcpu *vc;
+
+ list_for_each( iter, &spc->runq )
+ {
+ speer = __runq_elem(iter);
+
+ /*
+ * If next available VCPU here is not of higher priority than ours,
+ * this PCPU is useless to us.
+ */
+ if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+ {
+ CSCHED_STAT_CRANK(steal_peer_idle);
+ break;
+ }
+
+ /* Is this VCPU is runnable on our PCPU? */
+ vc = speer->vcpu;
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( __csched_vcpu_is_stealable(cpu, vc) )
+ {
+ /* We got a candidate. Grab it! */
+ __runq_remove(speer);
+ vc->processor = cpu;
+
+ return speer;
+ }
+ }
+
+ return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+ struct csched_pcpu *spc;
+ struct csched_vcpu *speer;
+ int peer_cpu;
+
+ if ( snext->pri == CSCHED_PRI_IDLE )
+ CSCHED_STAT_CRANK(load_balance_idle);
+ else if ( snext->pri == CSCHED_PRI_TS_OVER )
+ CSCHED_STAT_CRANK(load_balance_over);
+ else
+ CSCHED_STAT_CRANK(load_balance_other);
+
+ peer_cpu = cpu;
+ BUG_ON( peer_cpu != snext->vcpu->processor );
+
+ while ( 1 )
+ {
+ /* For each PCPU in the system starting with our neighbour... */
+ peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+ if ( peer_cpu == cpu )
+ break;
+
+ BUG_ON( peer_cpu >= csched_priv.ncpus );
+ BUG_ON( peer_cpu == cpu );
+
+ /*
+ * Get ahold of the scheduler lock for this peer CPU.
+ *
+ * Note: We don't spin on this lock but simply try it. Spinning could
+ * cause a deadlock if the peer CPU is also load balancing and trying
+ * to lock this CPU.
+ */
+ if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+ {
+
+ spc = CSCHED_PCPU(peer_cpu);
+ if ( unlikely(spc == NULL) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_down);
+ speer = NULL;
+ }
+ else
+ {
+ speer = csched_runq_steal(spc, cpu, snext->pri);
+ }
+
+ spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+ /* Got one! */
+ if ( speer )
+ {
+ CSCHED_STAT_CRANK(vcpu_migrate);
+ return speer;
+ }
+ }
+ else
+ {
+ CSCHED_STAT_CRANK(steal_trylock_failed);
+ }
+ }
+
+
+ /* Failed to find more important work */
+ __runq_remove(snext);
+ return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+ const int cpu = smp_processor_id();
+ struct list_head * const runq = RUNQ(cpu);
+ struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_vcpu *snext;
+ struct task_slice ret;
+
+ CSCHED_STAT_CRANK(schedule);
+ CSCHED_VCPU_CHECK(current);
+
+ /*
+ * Select next runnable local VCPU (ie top of local runq)
+ */
+ if ( vcpu_runnable(current) )
+ __runq_insert(cpu, scurr);
+ else
+ BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+ snext = __runq_elem(runq->next);
+
+ /*
+ * SMP Load balance:
+ *
+ * If the next highest priority local runnable VCPU has already eaten
+ * through its credits, look on other PCPUs to see if we have more
+ * urgent work... If not, csched_load_balance() will return snext, but
+ * already removed from the runq.
+ */
+ if ( snext->pri > CSCHED_PRI_TS_OVER )
+ __runq_remove(snext);
+ else
+ snext = csched_load_balance(cpu, snext);
+
+ /*
+ * Update idlers mask if necessary. When we're idling, other CPUs
+ * will tickle us when they get extra work.
+ */
+ if ( snext->pri == CSCHED_PRI_IDLE )
+ {
+ if ( !cpu_isset(cpu, csched_priv.idlers) )
+ cpu_set(cpu, csched_priv.idlers);
+ }
+ else if ( cpu_isset(cpu, csched_priv.idlers) )
+ {
+ cpu_clear(cpu, csched_priv.idlers);
+ }
+
+ /*
+ * Return task to run next...
+ */
+ ret.time = MILLISECS(CSCHED_TSLICE);
+ ret.task = snext->vcpu;
+
+ CSCHED_VCPU_CHECK(ret.task);
+ BUG_ON( !vcpu_runnable(ret.task) );
+
+ return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+ struct csched_dom * const sdom = svc->sdom;
+
+ printk("[%i.%i] pri=%i cpu=%i",
+ svc->vcpu->domain->domain_id,
+ svc->vcpu->vcpu_id,
+ svc->pri,
+ svc->vcpu->processor);
+
+ if ( sdom )
+ {
+ printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+ atomic_read(&svc->credit),
+ svc->credit_last,
+ svc->credit_incr,
+ svc->state_active,
+ svc->state_idle,
+ sdom->weight);
+ }
+
+ printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+ struct list_head *runq, *iter;
+ struct csched_pcpu *spc;
+ struct csched_vcpu *svc;
+ int loop;
+
+ spc = CSCHED_PCPU(cpu);
+ runq = &spc->runq;
+
+ printk(" tick=%lu, sort=%d\n",
+ schedule_data[cpu].tick,
+ spc->runq_sort_last);
+
+ /* current VCPU */
+ svc = CSCHED_VCPU(schedule_data[cpu].curr);
+ if ( svc )
+ {
+ printk("\trun: ");
+ csched_dump_vcpu(svc);
+ }
+
+ loop = 0;
+ list_for_each( iter, runq )
+ {
+ svc = __runq_elem(iter);
+ if ( svc )
+ {
+ printk("\t%3d: ", ++loop);
+ csched_dump_vcpu(svc);
+ }
+ }
+}
+
+static void
+csched_dump(void)
+{
+ struct list_head *iter_sdom, *iter_svc;
+ int loop;
+
+ printk("info:\n"
+ "\tncpus = %u\n"
+ "\tmaster = %u\n"
+ "\tcredit = %u\n"
+ "\tcredit balance = %d\n"
+ "\tweight = %u\n"
+ "\trunq_sort = %u\n"
+ "\ttick = %dms\n"
+ "\ttslice = %dms\n"
+ "\taccounting period = %dms\n"
+ "\tdefault-weight = %d\n",
+ csched_priv.ncpus,
+ csched_priv.master,
+ csched_priv.credit,
+ csched_priv.credit_balance,
+ csched_priv.weight,
+ csched_priv.runq_sort,
+ CSCHED_TICK,
+ CSCHED_TSLICE,
+ CSCHED_ACCT_PERIOD,
+ CSCHED_DEFAULT_WEIGHT);
+
+ printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+ CSCHED_STATS_PRINTK();
+
+ printk("active vcpus:\n");
+ loop = 0;
+ list_for_each( iter_sdom, &csched_priv.active_sdom )
+ {
+ struct csched_dom *sdom;
+ sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+ list_for_each( iter_svc, &sdom->active_vcpu )
+ {
+ struct csched_vcpu *svc;
+ svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+ printk("\t%3d: ", ++loop);
+ csched_dump_vcpu(svc);
+ }
+ }
+}
+
+static void
+csched_init(void)
+{
+ spin_lock_init(&csched_priv.lock);
+ INIT_LIST_HEAD(&csched_priv.active_sdom);
+ csched_priv.ncpus = 0;
+ csched_priv.master = UINT_MAX;
+ cpus_clear(csched_priv.idlers);
+ csched_priv.weight = 0U;
+ csched_priv.credit = 0U;
+ csched_priv.credit_balance = 0;
+ csched_priv.runq_sort = 0U;
+ CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+ .name = "SMP Credit Scheduler",
+ .opt_name = "credit",
+ .sched_id = SCHED_CREDIT,
+
+ .alloc_task = csched_vcpu_alloc,
+ .add_task = csched_vcpu_add,
+ .sleep = csched_vcpu_sleep,
+ .wake = csched_vcpu_wake,
+ .set_affinity = csched_vcpu_set_affinity,
+
+ .adjdom = csched_dom_cntl,
+ .free_task = csched_dom_free,
+
+ .tick = csched_tick,
+ .do_schedule = csched_schedule,
+
+ .dump_cpu_state = csched_dump_pcpu,
+ .dump_settings = csched_dump,
+ .init = csched_init,
+};
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Thu May 25
15:59:18 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
- char *s;
- int i;
- char *e;
- char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
- if (IS_ERR(macstr))
- return PTR_ERR(macstr);
- s = macstr;
- for (i = 0; i < ETH_ALEN; i++) {
- mac[i] = simple_strtoul(s, &e, 16);
- if (s == e || (e[0] != ':' && e[0] != 0)) {
- kfree(macstr);
- return -ENOENT;
- }
- s = &e[1];
- }
- kfree(macstr);
- return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/include/asm-x86_64/e820.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h Thu May 25 15:59:18
2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-/*
- * structures and definitions for the int 15, ax=e820 memory map
- * scheme.
- *
- * In a nutshell, setup.S populates a scratch table in the
- * empty_zero_block that contains a list of usable address/size
- * duples. setup.c, this information is transferred into the e820map,
- * and in init.c/numa.c, that new information is used to mark pages
- * reserved or not.
- */
-#ifndef __E820_HEADER
-#define __E820_HEADER
-
-#include <linux/mmzone.h>
-
-#define E820MAP 0x2d0 /* our map */
-#define E820MAX 128 /* number of entries in E820MAP */
-#define E820NR 0x1e8 /* # entries in E820MAP */
-
-#define E820_RAM 1
-#define E820_RESERVED 2
-#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
-#define E820_NVS 4
-
-#define HIGH_MEMORY (1024*1024)
-
-#define LOWMEMSIZE() (0x9f000)
-
-#ifndef __ASSEMBLY__
-struct e820entry {
- u64 addr; /* start of memory segment */
- u64 size; /* size of memory segment */
- u32 type; /* type of memory segment */
-} __attribute__((packed));
-
-struct e820map {
- int nr_map;
- struct e820entry map[E820MAX];
-};
-
-extern unsigned long find_e820_area(unsigned long start, unsigned long end,
- unsigned size);
-extern void add_memory_region(unsigned long start, unsigned long size,
- int type);
-extern void setup_memory_region(void);
-extern void contig_e820_setup(void);
-extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
-extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
-
-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned
long end);
-extern void e820_setup_gap(struct e820entry *e820, int nr_map);
-extern unsigned long e820_hole_size(unsigned long start_pfn,
- unsigned long end_pfn);
-
-extern void __init parse_memopt(char *p, char **end);
-extern void __init parse_memmapopt(char *p, char **end);
-
-extern struct e820map e820;
-#endif/*!__ASSEMBLY__*/
-
-#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h Thu May 25
15:59:18 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array. mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h Thu May 25 15:59:18 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/*
- Copyright (C) 2005 XenSource Ltd
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|