# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID b53a65034532e790184c7d68a350879a5c6b2eec
# Parent a4196568095c0551fa41dba7be6a57b008346b4d
# Parent 55a5ad2f028d55758c6193b4fec970ee46a60ec1
Merge in minor ia64 Makefile change from xen-unstable
diff -r a4196568095c -r b53a65034532 .hgignore
--- a/.hgignore Fri Jul 29 18:52:33 2005
+++ b/.hgignore Fri Jul 29 20:25:03 2005
@@ -87,6 +87,11 @@
^tools/cmdline/.*$
^tools/cmdline/xen/.*$
^tools/debugger/pdb/pdb$
+^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
+^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
+^tools/debugger/pdb/linux-[0-9.]*-module/\..*\.cmd$
+^tools/debugger/pdb/linux-[0-9.]*-module/.tmp_versions/.*$
+^tools/debugger/pdb/._bcdi/.*$
^tools/firmware/acpi/acpigen$
^tools/firmware/.*\.bin$
^tools/firmware/.*\.sym$
@@ -157,10 +162,6 @@
^xen/arch/x86/asm-offsets\.s$
^xen/arch/x86/boot/mkelf32$
^xen/ddb/.*$
-^xen/drivers/pci/classlist\.h$
-^xen/drivers/pci/devlist\.h$
-^xen/drivers/pci/gen-devlist$
-^xen/figlet/figlet$
^xen/include/asm$
^xen/include/asm-.*/asm-offsets\.h$
^xen/include/hypervisor-ifs/arch$
@@ -170,8 +171,8 @@
^xen/include/xen/banner\.h$
^xen/include/xen/compile\.h$
^xen/tags$
-^xen/tools/elf-reloc$
^xen/tools/figlet/figlet$
+^xen/tools/symbols$
^xen/xen$
^xen/xen-syms$
^xen/xen\..*$
diff -r a4196568095c -r b53a65034532 Makefile
--- a/Makefile Fri Jul 29 18:52:33 2005
+++ b/Makefile Fri Jul 29 20:25:03 2005
@@ -163,7 +163,7 @@
uninstall: DESTDIR=
uninstall: D=$(DESTDIR)
uninstall:
- [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-$(date +%s)
+ [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s`
rm -rf $(D)/etc/init.d/xend*
rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
diff -r a4196568095c -r b53a65034532 buildconfigs/Rules.mk
--- a/buildconfigs/Rules.mk Fri Jul 29 18:52:33 2005
+++ b/buildconfigs/Rules.mk Fri Jul 29 20:25:03 2005
@@ -101,7 +101,7 @@
rm -rf tmp-$@
%-mrproper: %-mrproper-extra
- rm -rf pristine-$* ref-$* $*.tar.bz2
+ rm -rf pristine-$(*)* ref-$(*)* $*.tar.bz2
rm -rf $*-xen.patch
netbsd-%-mrproper-extra:
@@ -111,5 +111,12 @@
%-mrproper-extra:
@: # do nothing
+config-update-pae:
+ifeq ($(XEN_TARGET_X86_PAE),y)
+ sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\#
CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) >
$(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE)
+else
+ @: # do nothing yet
+endif
+
# never delete any intermediate files.
.SECONDARY:
diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Fri Jul 29 18:52:33 2005
+++ b/buildconfigs/mk.linux-2.6-xen Fri Jul 29 20:25:03 2005
@@ -32,6 +32,8 @@
cp
$(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION)
$(LINUX_DIR)/.config \
|| cp
$(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \
$(LINUX_DIR)/.config
+ # See if we need to munge config to enable PAE
+ $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk
config-update-pae
# Patch kernel Makefile to set EXTRAVERSION
( cd $(LINUX_DIR) ; \
sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST =
-$(EXTRAVERSION)/' Makefile >Mk.tmp ; \
diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xen0
--- a/buildconfigs/mk.linux-2.6-xen0 Fri Jul 29 18:52:33 2005
+++ b/buildconfigs/mk.linux-2.6-xen0 Fri Jul 29 20:25:03 2005
@@ -32,6 +32,8 @@
cp
$(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION)
$(LINUX_DIR)/.config \
|| cp
$(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \
$(LINUX_DIR)/.config
+ # See if we need to munge config to enable PAE
+ $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk
config-update-pae
# Patch kernel Makefile to set EXTRAVERSION
( cd $(LINUX_DIR) ; \
sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST =
-$(EXTRAVERSION)/' Makefile >Mk.tmp ; \
diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xenU
--- a/buildconfigs/mk.linux-2.6-xenU Fri Jul 29 18:52:33 2005
+++ b/buildconfigs/mk.linux-2.6-xenU Fri Jul 29 20:25:03 2005
@@ -32,6 +32,8 @@
cp
$(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION)
$(LINUX_DIR)/.config \
|| cp
$(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \
$(LINUX_DIR)/.config
+ # See if we need to munge config to enable PAE
+ $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk
config-update-pae
# Patch kernel Makefile to set EXTRAVERSION
( cd $(LINUX_DIR) ; \
sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST =
-$(EXTRAVERSION)/' Makefile >Mk.tmp ; \
diff -r a4196568095c -r b53a65034532 docs/misc/shype4xen_readme.txt
--- a/docs/misc/shype4xen_readme.txt Fri Jul 29 18:52:33 2005
+++ b/docs/misc/shype4xen_readme.txt Fri Jul 29 20:25:03 2005
@@ -567,4 +567,22 @@
Our policy interface enables managers to create a single binary policy file in
a trusted
environment and distributed it to multiple systems for enforcement.
+5. Booting with a binary policy:
+********************************
+The grub configuration file can be adapted to boot the hypervisor with an
+already active policy. To do this, a binary policy file - this can be
+the same file as used by the policy_tool - should be placed into the boot
+partition. The following entry from the grub configuration file shows how
+a binary policy can be added to the system during boot time. Note that the
+binary policy must be of the same type that the hypervisor was compiled
+for. The policy module line should also only be added as the last module
+line if XEN was compiled with the access control module (ACM).
+
+title XEN0 3.0 Devel
+ kernel /xen.gz dom0_mem=400000
+ module /vmlinuz-2.6.12-xen0 root=/dev/hda2 ro console=tty0
+ module /initrd-2.6.12-xen0.img
+ module /xen_sample_policy.bin
+
+
====================end-of file=======================================
diff -r a4196568095c -r b53a65034532 docs/src/user.tex
--- a/docs/src/user.tex Fri Jul 29 18:52:33 2005
+++ b/docs/src/user.tex Fri Jul 29 20:25:03 2005
@@ -930,12 +930,12 @@
\subsection{Setting memory footprints from dom0}
The machine administrator can request that a domain alter its memory
-footprint using the \path{xm balloon} command. For instance, we can
+footprint using the \path{xm set-mem} command. For instance, we can
request that our example ttylinux domain reduce its memory footprint
to 32 megabytes.
\begin{verbatim}
-# xm balloon ttylinux 32
+# xm set-mem ttylinux 32
\end{verbatim}
We can now see the result of this in the output of \path{xm list}:
@@ -951,16 +951,16 @@
can restore the domain to its original size using the command line:
\begin{verbatim}
-# xm balloon ttylinux 64
+# xm set-mem ttylinux 64
\end{verbatim}
\subsection{Setting memory footprints from within a domain}
-The virtual file \path{/proc/xen/memory\_target} allows the owner of a
+The virtual file \path{/proc/xen/balloon} allows the owner of a
domain to adjust their own memory footprint. Reading the file
-(e.g. \path{cat /proc/xen/memory\_target}) prints out the current
+(e.g. \path{cat /proc/xen/balloon}) prints out the current
memory footprint of the domain. Writing the file
-(e.g. \path{echo new\_target > /proc/xen/memory\_target}) requests
+(e.g. \path{echo new\_target > /proc/xen/balloon}) requests
that the kernel adjust the domain's memory footprint to a new value.
\subsection{Setting memory limits}
@@ -1285,7 +1285,7 @@
The available commands are as follows:
\begin{description}
-\item[balloon] Request a domain to adjust its memory footprint.
+\item[set-mem] Request a domain to adjust its memory footprint.
\item[create] Create a new domain.
\item[destroy] Kill a domain immediately.
\item[list] List running domains.
diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Jul 29 20:25:03 2005
@@ -96,6 +96,20 @@
network interfaces within another guest OS. Unless you are building a
dedicated device-driver domain, or your master control domain
(domain 0), then you almost certainly want to say Y here.
+
+config XEN_NETDEV_GRANT_TX
+ bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+ default n
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+ bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+ default n
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
bool "Pipelined transmitter (DANGEROUS)"
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Jul
29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Jul
29 20:25:03 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xen0
-# Sat Jul 9 09:19:47 2005
+# Mon Jul 25 09:48:34 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -18,6 +18,8 @@
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -176,38 +178,12 @@
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_HIGHMEM is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_FRAME_POINTER is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_4KSTACKS is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
-CONFIG_X86_MPPARSE=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y
CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
#
# Executable file formats
@@ -1274,3 +1250,29 @@
CONFIG_CRC32=y
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Jul
29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Jul
29 20:25:03 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xenU
-# Sun Jul 10 17:32:04 2005
+# Mon Jul 25 10:06:06 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -15,6 +15,8 @@
CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Jul 29
20:25:03 2005
@@ -19,7 +19,7 @@
s-obj-y :=
obj-y += cpu/
-obj-y += timers/
+#obj-y += timers/
obj-$(CONFIG_ACPI_BOOT) += acpi/
#c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
c-obj-$(CONFIG_MCA) += mca.o
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Jul 29 20:25:03 2005
@@ -613,8 +613,6 @@
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
- testl %eax,%eax
- jnz restore_all
jmp ret_from_exception
#if 0 /* XEN */
@@ -669,8 +667,6 @@
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
- testl %eax,%eax
- jnz restore_all
jmp ret_from_exception
ENTRY(overflow)
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Jul 29 20:25:03 2005
@@ -9,6 +9,9 @@
.ascii ",PAE=yes"
#else
.ascii ",PAE=no"
+#endif
+#ifdef CONFIG_XEN_SHADOW_MODE
+ .ascii ",SHADOW=translate"
#endif
.ascii ",LOADER=generic"
.byte 0
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Fri Jul 29 20:25:03 2005
@@ -263,7 +263,6 @@
void fixup_irqs(cpumask_t map)
{
unsigned int irq;
- static int warned;
for (irq = 0; irq < NR_IRQS; irq++) {
cpumask_t mask;
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Fri Jul 29
20:25:03 2005
@@ -748,8 +748,10 @@
|| (mpf->mpf_specification == 4)) ) {
smp_found_config = 1;
+#ifndef CONFIG_XEN
printk(KERN_INFO "found SMP MP-table at %08lx\n",
virt_to_phys(mpf));
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
if (mpf->mpf_physptr) {
/*
* We cannot access to MPC table to compute
@@ -766,6 +768,10 @@
size = end - mpf->mpf_physptr;
reserve_bootmem(mpf->mpf_physptr, size);
}
+#else
+ printk(KERN_INFO "found SMP MP-table at %08lx\n",
+ ((unsigned long)bp - (unsigned
long)isa_bus_to_virt(base)) + base);
+#endif
mpf_found = mpf;
return 1;
@@ -809,9 +815,11 @@
* MP1.4 SPEC states to only scan first 1K of 4K EBDA.
*/
+#ifndef CONFIG_XEN
address = get_bios_ebda();
if (address)
smp_scan_config(address, 0x400);
+#endif
}
/* --------------------------------------------------------------------------
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Jul 29 20:25:03 2005
@@ -1060,6 +1060,7 @@
}
}
+#ifndef CONFIG_XEN
/*
* workaround for Dell systems that neglect to reserve EBDA
*/
@@ -1070,6 +1071,7 @@
if (addr)
reserve_bootmem(addr, PAGE_SIZE);
}
+#endif
#ifndef CONFIG_DISCONTIGMEM
void __init setup_bootmem_allocator(void);
@@ -1152,6 +1154,13 @@
reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+#ifndef CONFIG_XEN
+ /*
+ * reserve physical page 0 - it's a special BIOS page on many boxes,
+ * enabling clean reboots, SMP operation, laptop functions.
+ */
+ reserve_bootmem(0, PAGE_SIZE);
+
/* reserve EBDA region, it's a 4K region */
reserve_ebda_region();
@@ -1176,6 +1185,7 @@
*/
acpi_reserve_bootmem();
#endif
+#endif /* !CONFIG_XEN */
#ifdef CONFIG_BLK_DEV_INITRD
if (xen_start_info.mod_start) {
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Jul 29 20:25:03 2005
@@ -104,24 +104,16 @@
struct timer_opts *cur_timer = &timer_tsc;
/* These are peridically updated in shared_info, and then copied here. */
-u32 shadow_tsc_stamp;
-u64 shadow_system_time;
-static u32 shadow_time_version;
+struct shadow_time_info {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ u32 tsc_to_usec_mul;
+ int tsc_shift;
+ u32 version;
+};
+static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
static struct timeval shadow_tv;
-
-/*
- * We use this to ensure that gettimeofday() is monotonically increasing. We
- * only break this guarantee if the wall clock jumps backwards "a long way".
- */
-static struct timeval last_seen_tv = {0,0};
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-/* Periodically propagate synchronised time base to the RTC and to Xen. */
-static long last_rtc_update, last_update_to_xen;
-#endif
-
-/* Periodically take synchronised time base from Xen, if we need it. */
-static long last_update_from_xen; /* UTC seconds when last read Xen clock. */
/* Keep track of last time we did processing/updating of jiffies and xtime. */
static u64 processed_system_time; /* System time (ns) at last processing. */
@@ -164,26 +156,149 @@
#define INDEPENDENT_WALLCLOCK() \
(independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
+int tsc_disable __initdata = 0;
+
+static void delay_tsc(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do
+ {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+struct timer_opts timer_tsc = {
+ .name = "tsc",
+ .delay = delay_tsc,
+};
+
+static inline u32 down_shift(u64 time, int shift)
+{
+ if ( shift < 0 )
+ return (u32)(time >> -shift);
+ return (u32)((u32)time << shift);
+}
+
+/*
+ * 32-bit multiplication of integer multiplicand and fractional multiplier
+ * yielding 32-bit integer product.
+ */
+static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
+{
+ u32 product_int, product_frac;
+ __asm__ (
+ "mul %3"
+ : "=a" (product_frac), "=d" (product_int)
+ : "0" (multiplicand), "r" (multiplier) );
+ return product_int;
+}
+
+void init_cpu_khz(void)
+{
+ u64 __cpu_khz = 1000000ULL << 32;
+ struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
+ do_div(__cpu_khz, info->tsc_to_system_mul);
+ cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
+ printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+}
+
+static u64 get_nsec_offset(struct shadow_time_info *shadow)
+{
+ u64 now;
+ u32 delta;
+ rdtscll(now);
+ delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+ return mul_frac(delta, shadow->tsc_to_nsec_mul);
+}
+
+static unsigned long get_usec_offset(struct shadow_time_info *shadow)
+{
+ u64 now;
+ u32 delta;
+ rdtscll(now);
+ delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+ return mul_frac(delta, shadow->tsc_to_usec_mul);
+}
+
+static void update_wallclock(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ long wtm_nsec, xtime_nsec;
+ time_t wtm_sec, xtime_sec;
+ u64 tmp, usec;
+
+ shadow_tv.tv_sec = s->wc_sec;
+ shadow_tv.tv_usec = s->wc_usec;
+
+ if (INDEPENDENT_WALLCLOCK())
+ return;
+
+ if ((time_status & STA_UNSYNC) != 0)
+ return;
+
+ /* Adjust wall-clock time base based on wall_jiffies ticks. */
+ usec = processed_system_time;
+ do_div(usec, 1000);
+ usec += (u64)shadow_tv.tv_sec * 1000000ULL;
+ usec += (u64)shadow_tv.tv_usec;
+ usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
+
+ /* Split wallclock base into seconds and nanoseconds. */
+ tmp = usec;
+ xtime_nsec = do_div(tmp, 1000000) * 1000ULL;
+ xtime_sec = (time_t)tmp;
+
+ wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
+ wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
+
+ set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
+ set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+}
+
/*
* Reads a consistent set of time-base values from Xen, into a shadow data
* area. Must be called with the xtime_lock held for writing.
*/
static void __get_time_values_from_xen(void)
{
- shared_info_t *s = HYPERVISOR_shared_info;
+ shared_info_t *s = HYPERVISOR_shared_info;
+ struct vcpu_time_info *src;
+ struct shadow_time_info *dst;
+
+ src = &s->vcpu_time[smp_processor_id()];
+ dst = &per_cpu(shadow_time, smp_processor_id());
do {
- shadow_time_version = s->time_version2;
+ dst->version = src->time_version2;
rmb();
- shadow_tv.tv_sec = s->wc_sec;
- shadow_tv.tv_usec = s->wc_usec;
- shadow_tsc_stamp = (u32)s->tsc_timestamp;
- shadow_system_time = s->system_time;
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_timestamp = src->system_time;
+ dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
rmb();
}
- while (shadow_time_version != s->time_version1);
-
- cur_timer->mark_offset();
+ while (dst->version != src->time_version1);
+
+ dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
+
+ if ((shadow_tv.tv_sec != s->wc_sec) ||
+ (shadow_tv.tv_usec != s->wc_usec))
+ update_wallclock();
+}
+
+static inline int time_values_up_to_date(int cpu)
+{
+ struct vcpu_time_info *src;
+ struct shadow_time_info *dst;
+
+ src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()];
+ dst = &per_cpu(shadow_time, smp_processor_id());
+
+ return (dst->version == src->time_version2);
}
#define TIME_VALUES_UP_TO_DATE \
@@ -229,13 +344,18 @@
unsigned long max_ntp_tick;
unsigned long flags;
s64 nsec;
+ unsigned int cpu;
+ struct shadow_time_info *shadow;
+
+ cpu = get_cpu();
+ shadow = &per_cpu(shadow_time, cpu);
do {
unsigned long lost;
seq = read_seqbegin(&xtime_lock);
- usec = cur_timer->get_offset();
+ usec = get_usec_offset(shadow);
lost = jiffies - wall_jiffies;
/*
@@ -256,11 +376,11 @@
sec = xtime.tv_sec;
usec += (xtime.tv_nsec / NSEC_PER_USEC);
- nsec = shadow_system_time - processed_system_time;
+ nsec = shadow->system_timestamp - processed_system_time;
__normalize_time(&sec, &nsec);
usec += (long)nsec / NSEC_PER_USEC;
- if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
+ if (unlikely(!time_values_up_to_date(cpu))) {
/*
* We may have blocked for a long time,
* rendering our calculations invalid
@@ -275,19 +395,11 @@
}
} while (read_seqretry(&xtime_lock, seq));
+ put_cpu();
+
while (usec >= USEC_PER_SEC) {
usec -= USEC_PER_SEC;
sec++;
- }
-
- /* Ensure that time-of-day is monotonically increasing. */
- if ((sec < last_seen_tv.tv_sec) ||
- ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) {
- sec = last_seen_tv.tv_sec;
- usec = last_seen_tv.tv_usec;
- } else {
- last_seen_tv.tv_sec = sec;
- last_seen_tv.tv_usec = usec;
}
tv->tv_sec = sec;
@@ -302,12 +414,17 @@
long wtm_nsec;
s64 nsec;
struct timespec xentime;
+ unsigned int cpu;
+ struct shadow_time_info *shadow;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
if (!INDEPENDENT_WALLCLOCK())
return 0; /* Silent failure? */
+
+ cpu = get_cpu();
+ shadow = &per_cpu(shadow_time, cpu);
write_seqlock_irq(&xtime_lock);
@@ -317,9 +434,8 @@
* be stale, so we can retry with fresh ones.
*/
again:
- nsec = (s64)tv->tv_nsec -
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
- if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
+ nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
+ if (unlikely(!time_values_up_to_date(cpu))) {
__get_time_values_from_xen();
goto again;
}
@@ -335,7 +451,7 @@
*/
nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
- nsec -= (shadow_system_time - processed_system_time);
+ nsec -= (shadow->system_timestamp - processed_system_time);
__normalize_time(&sec, &nsec);
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -349,23 +465,20 @@
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
- /* Reset all our running time counts. They make no sense now. */
- last_seen_tv.tv_sec = 0;
- last_update_from_xen = 0;
-
#ifdef CONFIG_XEN_PRIVILEGED_GUEST
if (xen_start_info.flags & SIF_INITDOMAIN) {
dom0_op_t op;
- last_rtc_update = last_update_to_xen = 0;
op.cmd = DOM0_SETTIME;
op.u.settime.secs = xentime.tv_sec;
op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC;
- op.u.settime.system_time = shadow_system_time;
+ op.u.settime.system_time = shadow->system_timestamp;
write_sequnlock_irq(&xtime_lock);
HYPERVISOR_dom0_op(&op);
} else
#endif
write_sequnlock_irq(&xtime_lock);
+
+ put_cpu();
clock_was_set();
return 0;
@@ -403,9 +516,30 @@
*/
unsigned long long monotonic_clock(void)
{
- return cur_timer->monotonic_clock();
+ int cpu = get_cpu();
+ struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+ s64 off;
+ unsigned long flags;
+
+ for ( ; ; ) {
+ off = get_nsec_offset(shadow);
+ if (time_values_up_to_date(cpu))
+ break;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ __get_time_values_from_xen();
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ }
+
+ put_cpu();
+
+ return shadow->system_timestamp + off;
}
EXPORT_SYMBOL(monotonic_clock);
+
+unsigned long long sched_clock(void)
+{
+ return monotonic_clock();
+}
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
@@ -427,27 +561,26 @@
static inline void do_timer_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
- time_t wtm_sec, sec;
- s64 delta, delta_cpu, nsec;
- long sec_diff, wtm_nsec;
+ s64 delta, delta_cpu;
int cpu = smp_processor_id();
+ struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
do {
__get_time_values_from_xen();
- delta = delta_cpu = (s64)shadow_system_time +
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
+ delta = delta_cpu =
+ shadow->system_timestamp + get_nsec_offset(shadow);
delta -= processed_system_time;
delta_cpu -= per_cpu(processed_system_time, cpu);
}
- while (!TIME_VALUES_UP_TO_DATE);
+ while (!time_values_up_to_date(cpu));
if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
printk("Timer ISR/%d: Time went backwards: "
"delta=%lld cpu_delta=%lld shadow=%lld "
"off=%lld processed=%lld cpu_processed=%lld\n",
- cpu, delta, delta_cpu, shadow_system_time,
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC),
+ cpu, delta, delta_cpu, shadow->system_timestamp,
+ (s64)get_nsec_offset(shadow),
processed_system_time,
per_cpu(processed_system_time, cpu));
for (cpu = 0; cpu < num_online_cpus(); cpu++)
@@ -470,76 +603,6 @@
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING, regs);
}
-
- if (cpu != 0)
- return;
-
- /*
- * Take synchronised time from Xen once a minute if we're not
- * synchronised ourselves, and we haven't chosen to keep an independent
- * time base.
- */
- if (!INDEPENDENT_WALLCLOCK() &&
- ((time_status & STA_UNSYNC) != 0) &&
- (xtime.tv_sec > (last_update_from_xen + 60))) {
- /* Adjust shadow for jiffies that haven't updated xtime yet. */
- shadow_tv.tv_usec -=
- (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
- HANDLE_USEC_UNDERFLOW(shadow_tv);
-
- /*
- * Reset our running time counts if they are invalidated by
- * a warp backwards of more than 500ms.
- */
- sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
- if (unlikely(abs(sec_diff) > 1) ||
- unlikely(((sec_diff * USEC_PER_SEC) +
- (xtime.tv_nsec / NSEC_PER_USEC) -
- shadow_tv.tv_usec) > 500000)) {
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- last_rtc_update = last_update_to_xen = 0;
-#endif
- last_seen_tv.tv_sec = 0;
- }
-
- /* Update our unsynchronised xtime appropriately. */
- sec = shadow_tv.tv_sec;
- nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
-
- __normalize_time(&sec, &nsec);
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- last_update_from_xen = sec;
- }
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (!(xen_start_info.flags & SIF_INITDOMAIN))
- return;
-
- /* Send synchronised time to Xen approximately every minute. */
- if (((time_status & STA_UNSYNC) == 0) &&
- (xtime.tv_sec > (last_update_to_xen + 60))) {
- dom0_op_t op;
- struct timeval tv;
-
- tv.tv_sec = xtime.tv_sec;
- tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
- tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
- HANDLE_USEC_OVERFLOW(tv);
-
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = tv.tv_sec;
- op.u.settime.usecs = tv.tv_usec;
- op.u.settime.system_time = shadow_system_time;
- HYPERVISOR_dom0_op(&op);
-
- last_update_to_xen = xtime.tv_sec;
- }
-#endif
}
/*
@@ -731,12 +794,10 @@
xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- processed_system_time = shadow_system_time;
+ processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
- if (timer_tsc_init.init(NULL) != 0)
- BUG();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+ init_cpu_khz();
#if defined(__x86_64__)
vxtime.mode = VXTIME_TSC;
@@ -807,21 +868,15 @@
/* No locking required. We are only CPU running, and interrupts are off. */
void time_resume(void)
{
- if (timer_tsc_init.init(NULL) != 0)
- BUG();
+ init_cpu_khz();
/* Get timebases for new environment. */
__get_time_values_from_xen();
/* Reset our own concept of passage of system time. */
- processed_system_time = shadow_system_time;
+ processed_system_time =
+ per_cpu(shadow_time, smp_processor_id()).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
-
- /* Accept a warp in UTC (wall-clock) time. */
- last_seen_tv.tv_sec = 0;
-
- /* Make sure we resync UTC time with Xen on next timer interrupt. */
- last_update_from_xen = 0;
}
#ifdef CONFIG_SMP
@@ -832,7 +887,8 @@
do {
seq = read_seqbegin(&xtime_lock);
- per_cpu(processed_system_time, cpu) = shadow_system_time;
+ per_cpu(processed_system_time, cpu) =
+ per_cpu(shadow_time, cpu).system_timestamp;
} while (read_seqretry(&xtime_lock, seq));
per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
@@ -861,3 +917,13 @@
return 0;
}
__initcall(xen_sysctl_init);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Jul 29 20:25:03 2005
@@ -90,7 +90,9 @@
static int kstack_depth_to_print = 24;
struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+DEFINE_SPINLOCK(die_notifier_lock);
+EXPORT_SYMBOL(die_notifier_lock);
+EXPORT_SYMBOL(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
{
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Jul 29
20:25:03 2005
@@ -296,7 +296,7 @@
/* 2. Get a new contiguous memory extent. */
BUG_ON(HYPERVISOR_dom_mem_op(
- MEMOP_increase_reservation, &mfn, 1, order) != 1);
+ MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
/* 3. Map the new extent in place of old pages. */
for (i = 0; i < (1<<order); i++) {
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Fri Jul 29 20:25:03 2005
@@ -20,8 +20,6 @@
#include <linux/acpi.h>
#include "pci.h"
-
-#define DBG printk
#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
#define PIRQ_VERSION 0x0100
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Fri Jul 29 20:25:03 2005
@@ -166,8 +166,14 @@
u16 flags;
flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /*
+ * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+ * if gnttab_donate executes without interruption???
+ */
+#else
ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Jul 29 20:25:03 2005
@@ -172,8 +172,8 @@
{
static char *envp[] = { "HOME=/", "TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
- static char *restart_argv[] = { "/sbin/shutdown", "-r", "now", NULL };
- static char *poweroff_argv[] = { "/sbin/halt", "-p", NULL };
+ static char *restart_argv[] = { "/sbin/reboot", NULL };
+ static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
extern asmlinkage long sys_reboot(int magic1, int magic2,
unsigned int cmd, void *arg);
@@ -187,7 +187,7 @@
switch ( shutting_down )
{
case CMSG_SHUTDOWN_POWEROFF:
- if ( execve("/sbin/halt", poweroff_argv, envp) < 0 )
+ if ( execve("/sbin/poweroff", poweroff_argv, envp) < 0 )
{
sys_reboot(LINUX_REBOOT_MAGIC1,
LINUX_REBOOT_MAGIC2,
@@ -197,7 +197,7 @@
break;
case CMSG_SHUTDOWN_REBOOT:
- if ( execve("/sbin/shutdown", restart_argv, envp) < 0 )
+ if ( execve("/sbin/reboot", restart_argv, envp) < 0 )
{
sys_reboot(LINUX_REBOOT_MAGIC1,
LINUX_REBOOT_MAGIC2,
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Jul 29
20:25:03 2005
@@ -15,7 +15,7 @@
ptrace.o quirks.o syscall.o bootflag.o
i386-obj-y := time.o
-obj-y += ../../i386/kernel/timers/
+#obj-y += ../../i386/kernel/timers/
s-obj-y :=
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Jul 29
20:25:03 2005
@@ -145,7 +145,7 @@
__pte(0), 0);
}
- mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+ mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
BUG();
#endif
@@ -550,7 +550,7 @@
0, blkif->domid);
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
if ( blkif->is_blktap )
- mcl[i].args[3] = ID_TO_DOM(req->id);
+ mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id);
#endif
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Jul 29
20:25:03 2005
@@ -1428,8 +1428,9 @@
int i;
#ifdef CONFIG_XEN_BLKDEV_GRANT
- if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
- &gref_head, &gref_terminal ))
+ /* A grant for every ring slot, plus one for the ring itself. */
+ if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1,
+ &gref_head, &gref_terminal) )
return 1;
printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
#endif
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 29 20:25:03 2005
@@ -50,6 +50,9 @@
/* Private indexes into shared ring. */
NETIF_RING_IDX rx_req_cons;
NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
NETIF_RING_IDX tx_req_cons;
NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 29
20:25:03 2005
@@ -18,6 +18,24 @@
#include <linux/delay.h>
#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -41,7 +59,9 @@
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,20 @@
static PEND_RING_IDX dealloc_prod, dealloc_cons;
static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
static struct list_head net_schedule_list;
static spinlock_t net_schedule_list_lock;
@@ -91,6 +124,7 @@
return mfn;
}
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static void free_mfn(unsigned long mfn)
{
unsigned long flags;
@@ -102,6 +136,7 @@
BUG();
spin_unlock_irqrestore(&mfn_lock, flags);
}
+#endif
static inline void maybe_schedule_tx_action(void)
{
@@ -160,7 +195,17 @@
dev_kfree_skb(skb);
skb = nskb;
}
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+ printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x
gr=%04x\n",
+ netif->rx->req_prod,
+ netif->rx_req_cons,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
netif->rx_req_cons++;
netif_get(netif);
@@ -201,7 +246,11 @@
u16 size, id, evtchn;
multicall_entry_t *mcl;
mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_donate_t *gop;
+#else
struct mmuext_op *mmuext;
+#endif
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -212,7 +261,12 @@
mcl = rx_mcl;
mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
+
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -228,7 +282,6 @@
skb_queue_head(&rx_queue, skb);
break;
}
-
/*
* Set the new P2M table entry before reassigning the old data page.
* Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -239,6 +292,14 @@
pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop->mfn = mdata >> PAGE_SHIFT;
+ gop->domid = netif->domid;
+ gop->handle = netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+ netif->rx_resp_prod_copy++;
+ gop++;
+#else
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)mmuext;
mcl->args[1] = 1;
@@ -249,13 +310,16 @@
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
mmuext->mfn = mdata >> PAGE_SHIFT;
mmuext++;
-
+#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(vdata) >> PAGE_SHIFT;
mmu++;
__skb_queue_tail(&rxq, skb);
+#ifdef DEBUG_GRANT
+ dump_packet('a', mdata, vdata);
+#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
break;
@@ -271,12 +335,24 @@
mcl->args[3] = DOMID_SELF;
mcl++;
- mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
+ mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+ grant_rx_op, gop - grant_rx_op))) {
+ BUG();
+ }
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -284,9 +360,12 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
-
+#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -299,13 +378,16 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ BUG_ON(gop->status != 0);
+#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
status = NETIF_RSP_ERROR;
}
-
+#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
if ( make_rx_response(netif, id, status, mdata,
@@ -318,9 +400,13 @@
netif_put(netif);
dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl++;
+ gop++;
+#else
mcl += 2;
mmuext += 1;
+#endif
}
while ( notify_nr != 0 )
@@ -404,21 +490,39 @@
netif_schedule_work(netif);
}
-static void net_tx_action(unsigned long unused)
-{
- struct list_head *ent;
- struct sk_buff *skb;
+inline static void net_tx_action_dealloc(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS];
+ gnttab_unmap_grant_ref_t *gop;
+#else
+ multicall_entry_t *mcl;
+#endif
+ u16 pending_idx;
+ PEND_RING_IDX dc, dp;
netif_t *netif;
- netif_tx_request_t txreq;
- u16 pending_idx;
- NETIF_RING_IDX i;
- multicall_entry_t *mcl;
- PEND_RING_IDX dc, dp;
- unsigned int data_len;
-
- if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
- goto skip_dealloc;
-
+
+ dc = dealloc_cons;
+ dp = dealloc_prod;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = unmap_ops;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->dev_bus_addr = 0;
+ gop->handle = grant_tx_ref[pending_idx];
+ grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+ gop++;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ unmap_ops, gop - unmap_ops))) {
+ BUG();
+ }
+#else
mcl = tx_mcl;
while ( dc != dp )
{
@@ -428,15 +532,18 @@
mcl++;
}
- mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
BUG();
mcl = tx_mcl;
+#endif
while ( dealloc_cons != dp )
{
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
/* The update_va_mapping() must not fail. */
BUG_ON(mcl[0].result != 0);
+#endif
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -460,11 +567,38 @@
netif_put(netif);
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
mcl++;
- }
-
- skip_dealloc:
+#endif
+ }
+
+}
+
+/* Called after netfront has transmitted */
+static void net_tx_action(unsigned long unused)
+{
+ struct list_head *ent;
+ struct sk_buff *skb;
+ netif_t *netif;
+ netif_tx_request_t txreq;
+ u16 pending_idx;
+ NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS];
+ gnttab_map_grant_ref_t *mop;
+#else
+ multicall_entry_t *mcl;
+#endif
+ unsigned int data_len;
+
+ if ( dealloc_cons != dealloc_prod )
+ net_tx_action_dealloc();
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop = map_ops;
+#else
mcl = tx_mcl;
+#endif
while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list) )
{
@@ -486,7 +620,6 @@
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-
/* Credit-based scheduling. */
if ( txreq.size > netif->remaining_credit )
{
@@ -566,12 +699,20 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop->host_virt_addr = MMAP_VADDR(pending_idx);
+ mop->dom = netif->domid;
+ mop->ref = txreq.addr >> PAGE_SHIFT;
+ mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+ mop++;
+#else
MULTI_update_va_mapping_otherdomain(
mcl, MMAP_VADDR(pending_idx),
pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
0, netif->domid);
+
mcl++;
+#endif
memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
pending_tx_info[pending_idx].netif = netif;
@@ -581,11 +722,26 @@
pending_cons++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((mop - map_ops) >= ARRAY_SIZE(map_ops))
+ break;
+#else
/* Filled the batch queue? */
if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
break;
- }
-
+#endif
+ }
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (mop == map_ops) {
+ return;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ map_ops, mop - map_ops))) {
+ BUG();
+ }
+ mop = map_ops;
+#else
if ( mcl == tx_mcl )
return;
@@ -593,6 +749,7 @@
BUG();
mcl = tx_mcl;
+#endif
while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
{
pending_idx = *((u16 *)skb->data);
@@ -600,6 +757,20 @@
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (unlikely(mop->dev_bus_addr == 0)) {
+ printk(KERN_ALERT "#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ mop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(mop->dev_bus_addr);
+ grant_tx_ref[pending_idx] = mop->handle;
+#else
if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
@@ -613,6 +784,7 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
@@ -620,7 +792,6 @@
memcpy(skb->data,
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
data_len);
-
if ( data_len < txreq.size )
{
/* Append the packet payload as a fragment. */
@@ -654,7 +825,11 @@
netif_rx(skb);
netif->dev->last_rx = jiffies;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ mop++;
+#else
mcl++;
+#endif
}
}
@@ -774,6 +949,12 @@
return 0;
printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ printk("#### netback rx using grant tables\n");
+#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jul 29
20:25:03 2005
@@ -54,6 +54,25 @@
#include <asm/page.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+ unsigned char *p = (unsigned char *)ap;
+ int i;
+
+ printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
#endif
@@ -82,6 +101,21 @@
#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
+
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -322,6 +356,14 @@
for (i = np->tx_resp_cons; i != prod; i++) {
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+ printk(KERN_ALERT "netfront: query foreign access\n");
+ }
+ gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+ grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
ADD_ID_TO_FREELIST(np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -356,6 +398,9 @@
struct sk_buff *skb;
int i, batch_target;
NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ int ref;
+#endif
if (unlikely(np->backend_state != BEST_CONNECTED))
return;
@@ -388,7 +433,16 @@
np->rx_skbs[id] = skb;
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if ((ref = gnttab_claim_grant_reference(&gref_rx_head,
gref_rx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+ BUG();
+ }
+ grant_rx_ref[id] = ref;
+ gnttab_grant_foreign_transfer_ref(ref, rdomid,
+ virt_to_machine(skb->head) >> PAGE_SHIFT);
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -400,7 +454,7 @@
}
/* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+ rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
/* Give away a batch of pages. */
rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
@@ -436,6 +490,10 @@
struct net_private *np = netdev_priv(dev);
netif_tx_request_t *tx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ unsigned int ref;
+ unsigned long mfn;
+#endif
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -470,7 +528,18 @@
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
tx->id = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal))
< 0) {
+ printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+ BUG();
+ }
+ mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+ tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ grant_tx_ref[id] = ref;
+#else
tx->addr = virt_to_machine(skb->data);
+#endif
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -530,6 +599,10 @@
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ unsigned long mfn;
+ grant_ref_t ref;
+#endif
spin_lock(&np->rx_lock);
@@ -542,7 +615,6 @@
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
-
rp = np->rx->resp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -550,7 +622,6 @@
(i != rp) && (work_done < budget);
i++, work_done++) {
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
@@ -565,11 +636,23 @@
continue;
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ ref = grant_rx_ref[rx->id];
+ grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+ mfn = gnttab_end_foreign_transfer(ref);
+ gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
/* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ skb->data = skb->head + rx->addr;
+#else
skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
skb->len = rx->status;
skb->tail = skb->data + skb->len;
@@ -580,16 +663,32 @@
np->stats.rx_bytes += rx->status;
/* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+ pfn_pte_ma(mfn, PAGE_KERNEL), 0);
+#else
MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
pfn_pte_ma(rx->addr >> PAGE_SHIFT,
PAGE_KERNEL), 0);
+#endif
mcl++;
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mfn;
+#else
rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x
ref=%04x\n",
+ skb->data, mfn, ref);
+#endif
__skb_queue_tail(&rxq, skb);
}
@@ -608,6 +707,11 @@
}
while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
+ skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
* expects at least 16 bytes headroom in each receive buffer.
@@ -615,6 +719,7 @@
if (unlikely(skb->tail > skb->end) ||
unlikely((skb->data - skb->head) < 16)) {
nskb = NULL;
+
/* Only copy the packet if it fits in the current MTU. */
if (skb->len <= (dev->mtu + ETH_HLEN)) {
@@ -646,7 +751,6 @@
/* Set the shared-info area, which is hidden behind the real data. */
init_skb_shinfo(skb);
-
/* Ethernet-specific work. Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -919,6 +1023,9 @@
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ rdomid = status->domid;
+#endif
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
netctrl_connected_count();
(void)send_fake_arp(dev);
@@ -962,10 +1069,18 @@
np->rx_max_target = RX_MAX_TARGET;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)((unsigned long) i+1);
- for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
+ for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
np->rx_skbs[i] = (void *)((unsigned long) i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
dev->open = network_open;
dev->hard_start_xmit = network_start_xmit;
@@ -1267,6 +1382,22 @@
if (xen_start_info.flags & SIF_INITDOMAIN)
return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head, &gref_tx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head, &gref_rx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
if ((err = xennet_proc_init()) != 0)
return err;
@@ -1284,6 +1415,16 @@
DPRINTK("< err=%d\n", err);
return err;
+}
+
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
}
static void vif_suspend(struct net_private *np)
@@ -1478,3 +1619,4 @@
#endif
module_init(netif_init);
+module_exit(netif_exit);
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jul 29
20:25:03 2005
@@ -200,27 +200,29 @@
case IOCTL_PRIVCMD_INITDOMAIN_STORE:
{
extern int do_xenbus_probe(void*);
+ unsigned long page;
if (xen_start_info.store_evtchn != 0) {
- ret = -EINVAL;
+ ret = xen_start_info.store_mfn;
break;
}
/* Allocate page. */
- xen_start_info.store_page = get_zeroed_page(GFP_KERNEL);
- if (!xen_start_info.store_page) {
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page) {
ret = -ENOMEM;
break;
}
/* We don't refcnt properly, so set reserved on page.
* (this allocation is permanent) */
- SetPageReserved(virt_to_page(xen_start_info.store_page));
+ SetPageReserved(virt_to_page(page));
/* Initial connect. Setup channel and page. */
xen_start_info.store_evtchn = data;
- ret = pfn_to_mfn(virt_to_phys((void *)xen_start_info.store_page) >>
- PAGE_SHIFT);
+ xen_start_info.store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >>
+ PAGE_SHIFT);
+ ret = xen_start_info.store_mfn;
/* We'll return then this will wait for daemon to answer */
kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Jul 29
20:25:03 2005
@@ -193,7 +193,7 @@
__pte(0), 0);
}
- mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+ mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
BUG();
}
@@ -651,7 +651,7 @@
{
MULTI_update_va_mapping_otherdomain(
mcl+i, MMAP_VADDR(pending_idx, i),
- pfn_pte_ma(buffer_mach >> PAGE_SHIFT, remap_prot),
+ pfn_pte_ma((buffer_mach + offset) >> PAGE_SHIFT, remap_prot),
0, up->domid);
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Jul 29
20:25:03 2005
@@ -47,6 +47,17 @@
DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+static inline struct ringbuf_head *outbuf(void)
+{
+ return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT);
+}
+
+static inline struct ringbuf_head *inbuf(void)
+{
+ return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT)
+ + PAGE_SIZE/2;
+}
+
static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
{
wake_up(&xb_waitq);
@@ -108,9 +119,10 @@
return avail != 0;
}
-int xb_write(struct ringbuf_head *out, const void *data, unsigned len)
+int xb_write(const void *data, unsigned len)
{
struct ringbuf_head h;
+ struct ringbuf_head *out = outbuf();
do {
void *dst;
@@ -141,24 +153,26 @@
return 0;
}
-int xs_input_avail(struct ringbuf_head *in)
+int xs_input_avail(void)
{
unsigned int avail;
+ struct ringbuf_head *in = inbuf();
get_input_chunk(in, in->buf, &avail);
return avail != 0;
}
-int xb_read(struct ringbuf_head *in, void *data, unsigned len)
+int xb_read(void *data, unsigned len)
{
struct ringbuf_head h;
+ struct ringbuf_head *in = inbuf();
int was_full;
while (len != 0) {
unsigned int avail;
const char *src;
- wait_event(xb_waitq, xs_input_avail(in));
+ wait_event(xb_waitq, xs_input_avail());
h = *in;
mb();
if (!check_buffer(&h)) {
@@ -182,14 +196,14 @@
}
/* If we left something, wake watch thread to deal with it. */
- if (xs_input_avail(in))
+ if (xs_input_avail())
wake_up(&xb_waitq);
return 0;
}
/* Set up interrpt handler off store event channel. */
-int xb_init_comms(void **in, void **out)
+int xb_init_comms(void)
{
int err, irq;
@@ -202,7 +216,9 @@
return err;
}
- *out = (void *)xen_start_info.store_page;
- *in = (void *)xen_start_info.store_page + PAGE_SIZE/2;
+ /* FIXME zero out page -- domain builder should probably do this*/
+ memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT),
+ 0, PAGE_SIZE);
+
return 0;
}
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri Jul 29
20:25:03 2005
@@ -2,13 +2,12 @@
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
int xs_init(void);
-int xb_init_comms(void **in, void **out);
+int xb_init_comms(void);
/* Low level routines. */
-struct ringbuf_head;
-int xb_write(struct ringbuf_head *out, const void *data, unsigned len);
-int xb_read(struct ringbuf_head *in, void *data, unsigned len);
-int xs_input_avail(struct ringbuf_head *in);
+int xb_write(const void *data, unsigned len);
+int xb_read(void *data, unsigned len);
+int xs_input_avail(void);
extern wait_queue_head_t xb_waitq;
#endif /* _XENBUS_COMMS_H */
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jul 29
20:25:03 2005
@@ -1,6 +1,5 @@
/******************************************************************************
* Talks to Xen Store to figure out what devices we have.
- * Currently experiment code, but when I grow up I'll be a bus driver!
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
@@ -26,6 +25,8 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
+#define DEBUG
+
#include <asm-xen/hypervisor.h>
#include <asm-xen/xenbus.h>
#include <linux/kernel.h>
@@ -36,151 +37,7 @@
#include <stdarg.h>
#include "xenbus_comms.h"
-/* Name of field containing device type. */
-#define XENBUS_DEVICE_TYPE "type"
-
-#define DEBUG
-
-#ifdef DEBUG
-#define dprintf(_fmt, _args...) \
-printk(KERN_INFO __stringify(KBUILD_MODNAME) " [DBG] %s" _fmt,
__FUNCTION__, ##_args)
-#else
-#define dprintf(_fmt, _args...) do { } while(0)
-#endif
-
-static int xs_init_done = 0;
-
-/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(const char *dir, ...)
-{
- va_list ap;
- const char *name;
- int ret = 0;
-
- va_start(ap, dir);
- while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
- const char *fmt = va_arg(ap, char *);
- void *result = va_arg(ap, void *);
- char *p;
-
- p = xenbus_read(dir, name, NULL);
- if (IS_ERR(p)) {
- ret = PTR_ERR(p);
- break;
- }
- if (sscanf(p, fmt, result) == 0)
- ret = -EINVAL;
- kfree(p);
- }
- va_end(ap);
- return ret;
-}
-
-/* Return the path to dir with /name appended.
- * If name is null or empty returns a copy of dir.
- */
-char *xenbus_path(const char *dir, const char *name)
-{
- char *ret;
- int len;
-
- len = strlen(dir) + 1;
- if (name)
- len += strlen(name) + 1;
- ret = kmalloc(len, GFP_KERNEL);
- if (ret == NULL)
- return NULL;
- strcpy(ret, dir);
- if (name) {
- strcat(ret, "/");
- strcat(ret, name);
- }
- return ret;
-}
-
#define streq(a, b) (strcmp((a), (b)) == 0)
-
-char *xenbus_read(const char *dir, const char *name, unsigned int *data_n)
-{
- int err = 0;
- char *data = NULL;
- char *path = xenbus_path(dir, name);
- int n = 0;
-
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
- data = xs_read(path, &n);
- if (IS_ERR(data)) {
- err = PTR_ERR(data);
- if (err == -EISDIR)
- err = -ENOENT;
- } else if (n == 0) {
- err = -ENOENT;
- kfree(data);
- }
- kfree(path);
- out:
- if (data_n)
- *data_n = n;
- return (err ? ERR_PTR(err) : data);
-}
-
-int xenbus_write(const char *dir, const char *name, const char *data, int
data_n)
-{
- int err = 0;
- char *path = xenbus_path(dir, name);
-
- if (!path)
- return -ENOMEM;
- err = xs_write(path, data, data_n, O_CREAT);
- kfree(path);
- return err;
-}
-
-int xenbus_read_string(const char *dir, const char *name, char **val)
-{
- int err = 0;
-
- *val = xenbus_read(dir, name, NULL);
- if (IS_ERR(*val)) {
- err = PTR_ERR(*val);
- *val = NULL;
- }
- return err;
-}
-
-int xenbus_write_string(const char *dir, const char *name, const char *val)
-{
- return xenbus_write(dir, name, val, strlen(val));
-}
-
-int xenbus_read_ulong(const char *dir, const char *name, unsigned long *val)
-{
- return xenbus_gather(dir, name, "%lu", val, NULL);
-}
-
-int xenbus_write_ulong(const char *dir, const char *name, unsigned long val)
-{
- char data[32] = {};
-
- snprintf(data, sizeof(data), "%lu", val);
- return xenbus_write(dir, name, data, strlen(data));
-}
-
-int xenbus_read_long(const char *dir, const char *name, long *val)
-{
- return xenbus_gather(dir, name, "%li", val, NULL);
-}
-
-int xenbus_write_long(const char *dir, const char *name, long val)
-{
- char data[32] = {};
-
- snprintf(data, sizeof(data), "%li", val);
- return xenbus_write(dir, name, data, strlen(data));
-}
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
@@ -190,10 +47,13 @@
if (!streq(arr->devicetype, dev->devicetype))
continue;
- if (streq(arr->subtype, "") ||
- streq(arr->subtype, dev->subtype)) {
+ /* If they don't care what subtype, it's a match. */
+ if (streq(arr->subtype, ""))
return arr;
- }
+
+ /* If they care, device must have (same) subtype. */
+ if (dev->subtype && streq(arr->subtype, dev->subtype))
+ return arr;
}
return NULL;
}
@@ -214,86 +74,19 @@
.match = xenbus_match,
};
-
-/* Bus type for backend drivers. */
-static struct bus_type xenback_type = {
- .name = "xenback",
- .match = xenbus_match,
-};
-
-struct xenbus_for_dev {
- int (*fn)(struct xenbus_device *, void *);
- void *data;
-};
-
-static int for_dev(struct device *_dev, void *_data)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- struct xenbus_for_dev *data = _data;
- dev = to_xenbus_device(_dev);
- return data->fn(dev, data->data);
-}
-
-int xenbus_for_each_dev(struct xenbus_device * start, void * data,
- int (*fn)(struct xenbus_device *, void *))
-{
- struct xenbus_for_dev for_data = {
- .fn = fn,
- .data = data,
- };
- if (!fn)
- return -EINVAL;
- printk("%s> data=%p fn=%p for_data=%p\n", __FUNCTION__,
- data, fn, &for_data);
- return bus_for_each_dev(&xenbus_type,
- (start ? &start->dev : NULL),
- &for_data, for_dev);
-}
-
-struct xenbus_for_drv {
- int (*fn)(struct xenbus_driver *, void *);
- void *data;
-};
-
-static int for_drv(struct device_driver *_drv, void *_data)
-{
- struct xenbus_driver *drv = to_xenbus_driver(_drv);
- struct xenbus_for_drv *data = _data;
- return data->fn(drv, data->data);
-}
-
-int xenbus_for_each_drv(struct xenbus_driver * start, void * data,
- int (*fn)(struct xenbus_driver *, void *))
-{
- struct xenbus_for_drv for_data = {
- .fn = fn,
- .data = data,
- };
- if (!fn)
- return -EINVAL;
- return bus_for_each_drv(&xenbus_type,
- (start ? &start->driver: NULL),
- &for_data, for_drv);
-}
-
static int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
const struct xenbus_device_id *id;
- printk("Probing device '%s'\n", _dev->bus_id);
- if (!drv->probe) {
- printk("'%s' no probefn\n", _dev->bus_id);
+ if (!drv->probe)
return -ENODEV;
- }
id = match_device(drv->ids, dev);
- if (!id) {
- printk("'%s' no id match\n", _dev->bus_id);
+ if (!id)
return -ENODEV;
- }
- printk("probing '%s' fn %p\n", _dev->bus_id, drv->probe);
+
return drv->probe(dev, id);
}
@@ -309,76 +102,99 @@
int xenbus_register_driver(struct xenbus_driver *drv)
{
- int err = 0;
-
- printk("%s> frontend driver %p %s\n", __FUNCTION__,
- drv, drv->name);
+ int err;
+
drv->driver.name = drv->name;
drv->driver.bus = &xenbus_type;
drv->driver.owner = drv->owner;
drv->driver.probe = xenbus_dev_probe;
drv->driver.remove = xenbus_dev_remove;
+ down(&xenbus_lock);
err = driver_register(&drv->driver);
- if (err == 0 && xs_init_done && drv->connect) {
- printk("%s> connecting driver %p %s\n", __FUNCTION__,
- drv, drv->name);
- drv->connect(drv);
- }
+ up(&xenbus_lock);
return err;
}
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
+ down(&xenbus_lock);
driver_unregister(&drv->driver);
-}
-
-static int xenbus_probe_device(const char *dir, const char *name, const char
*devicetype)
+ up(&xenbus_lock);
+}
+
+struct xb_find_info
+{
+ struct xenbus_device *dev;
+ const char *busid;
+};
+
+static int cmp_dev(struct device *dev, void *data)
+{
+ struct xb_find_info *info = data;
+
+ if (streq(dev->bus_id, info->busid)) {
+ info->dev = container_of(get_device(dev),
+ struct xenbus_device, dev);
+ return 1;
+ }
+ return 0;
+}
+
+/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */
+struct xenbus_device *xenbus_device_find(const char *busid)
+{
+ struct xb_find_info info = { .dev = NULL, .busid = busid };
+
+ bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev);
+ return info.dev;
+}
+
+
+static void xenbus_release_device(struct device *dev)
+{
+ if (dev) {
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+
+ kfree(xendev->subtype);
+ kfree(xendev);
+ }
+}
+/* devices/<typename>/<name> */
+static int xenbus_probe_device(const char *dirpath, const char *devicetype,
+ const char *name)
{
int err;
struct xenbus_device *xendev;
- unsigned int xendev_n;
- char *nodename;
-
- dprintf("> dir=%s name=%s\n", dir, name);
- nodename = xenbus_path(dir, name);
- if (!nodename)
+ unsigned int stringlen;
+
+ /* Nodename: /device/<typename>/<name>/ */
+ stringlen = strlen(dirpath) + strlen(devicetype) + strlen(name) + 3;
+ /* Typename */
+ stringlen += strlen(devicetype) + 1;
+ xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
+ if (!xendev)
return -ENOMEM;
-
- /* FIXME: This could be a rescan. Don't re-register existing devices. */
-
- /* Add space for the strings. */
- xendev_n = sizeof(*xendev) + strlen(nodename) + strlen(devicetype) + 2;
- xendev = kmalloc(xendev_n, GFP_KERNEL);
- if (!xendev) {
- err = -ENOMEM;
- goto free_nodename;
- }
- memset(xendev, 0, xendev_n);
-
+ memset(xendev, 0, sizeof(*xendev));
+
+ /* Copy the strings into the extra space. */
+ xendev->nodename = (char *)(xendev + 1);
+ sprintf(xendev->nodename, "%s/%s/%s", dirpath, devicetype, name);
+ xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1;
+ strcpy(xendev->devicetype, devicetype);
+
+ /* FIXME: look for "subtype" field. */
snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s-%s", devicetype, name);
xendev->dev.bus = &xenbus_type;
-
- xendev->id = simple_strtol(name, NULL, 0);
-
- /* Copy the strings into the extra space. */
- xendev->nodename = (char *)(xendev + 1);
- strcpy(xendev->nodename, nodename);
- xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1;
- strcpy(xendev->devicetype, devicetype);
+ xendev->dev.release = xenbus_release_device;
/* Register with generic device framework. */
- printk("XENBUS: Registering device %s\n", xendev->dev.bus_id);
err = device_register(&xendev->dev);
if (err) {
printk("XENBUS: Registering device %s: error %i\n",
xendev->dev.bus_id, err);
kfree(xendev);
}
-
-free_nodename:
- kfree(nodename);
- dprintf("< err=%i\n", err);
return err;
}
@@ -386,30 +202,19 @@
{
int err = 0;
char **dir;
- char *path;
unsigned int dir_n = 0;
int i;
- dprintf("> dirpath=%s typename=%s\n", dirpath, typename);
- path = xenbus_path(dirpath, typename);
- if (!path)
- return -ENOMEM;
-
- dir = xs_directory(path, &dir_n);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto out;
- }
+ dir = xenbus_directory(dirpath, typename, &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_device(path, dir[i], typename);
+ err = xenbus_probe_device(dirpath, typename, dir[i]);
if (err)
break;
}
kfree(dir);
-out:
- kfree(path);
- dprintf("< err=%i\n", err);
return err;
}
@@ -419,198 +224,103 @@
char **dir;
unsigned int i, dir_n;
- dprintf("> path=%s\n", path);
- down(&xs_lock);
- dir = xs_directory(path, &dir_n);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto unlock;
- }
+ dir = xenbus_directory(path, "", &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
for (i = 0; i < dir_n; i++) {
err = xenbus_probe_device_type(path, dir[i]);
if (err)
break;
}
kfree(dir);
-unlock:
- up(&xs_lock);
- dprintf("< err=%i\n", err);
return err;
}
-
-static int xenbus_probe_backend(const char *dir, const char *name)
-{
- int err = 0;
- struct xenbus_device *xendev = NULL;
- unsigned int xendev_n = 0;
- char *nodename = NULL, *devicetype = NULL;
- unsigned int devicetype_n = 0;
-
- dprintf("> dir=%s name=%s\n", dir, name);
- nodename = xenbus_path(dir, name);
- if (!nodename)
- return -ENOMEM;
-
- devicetype = xenbus_read(nodename, XENBUS_DEVICE_TYPE, &devicetype_n);
- if (IS_ERR(devicetype)) {
- err = PTR_ERR(devicetype);
- goto free_nodename;
- }
-
- dprintf("> devicetype='%s'\n", devicetype);
- /* FIXME: This could be a rescan. Don't re-register existing devices. */
-
- /* Add space for the strings. */
- xendev_n = sizeof(*xendev) + strlen(nodename) + strlen(devicetype) + 2;
- xendev = kmalloc(xendev_n, GFP_KERNEL);
- if (!xendev) {
- err = -ENOMEM;
- goto free_devicetype;
- }
- memset(xendev, 0, xendev_n);
-
- snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s", devicetype);
- xendev->dev.bus = &xenback_type;
-
- /* Copy the strings into the extra space. */
- xendev->nodename = (char *)(xendev + 1);
- strcpy(xendev->nodename, nodename);
- xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1;
- strcpy(xendev->devicetype, devicetype);
-
- /* Register with generic device framework. */
- printk("XENBUS: Registering backend %s\n", xendev->dev.bus_id);
- err = device_register(&xendev->dev);
- if (err) {
- printk("XENBUS: Registering device %s: error %i\n",
- xendev->dev.bus_id, err);
- kfree(xendev);
- }
-
-free_devicetype:
- kfree(devicetype);
-free_nodename:
- kfree(nodename);
- dprintf("< err=%i\n", err);
- return err;
-}
-
-static int xenbus_probe_backends(const char *path)
-{
- int err = 0;
- char **dir;
- unsigned int i, dir_n;
-
- dprintf("> path=%s\n", path);
- down(&xs_lock);
- dir = xs_directory(path, &dir_n);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto unlock;
- }
- for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_backend(path, dir[i]);
- if (err)
- break;
- }
- kfree(dir);
-unlock:
- up(&xs_lock);
- dprintf("< err=%i\n", err);
- return err;
-}
-
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
- int err = 0;
-
- printk("%s> backend driver %p %s\n", __FUNCTION__,
- drv, drv->name);
- drv->driver.name = drv->name;
- drv->driver.bus = &xenback_type;
- drv->driver.owner = drv->owner;
- drv->driver.probe = xenbus_dev_probe;
- drv->driver.remove = xenbus_dev_remove;
-
- err = driver_register(&drv->driver);
- if (err == 0 && xs_init_done && drv->connect) {
- printk("%s> connecting driver %p %s\n", __FUNCTION__,
- drv, drv->name);
- drv->connect(drv);
- }
- return err;
-}
-
-void xenbus_unregister_backend(struct xenbus_driver *drv)
-{
- driver_unregister(&drv->driver);
-}
-
-int xenbus_for_each_backend(struct xenbus_driver * start, void * data,
- int (*fn)(struct xenbus_driver *, void *))
-{
- struct xenbus_for_drv for_data = {
- .fn = fn,
- .data = data,
- };
- if (!fn)
- return -EINVAL;
- return bus_for_each_drv(&xenback_type,
- (start ? &start->driver: NULL),
- &for_data, for_drv);
-}
-
-static int xenbus_driver_connect(struct xenbus_driver *drv, void *data)
-{
- printk("%s> driver %p %s\n", __FUNCTION__, drv, drv->name);
- if (drv->connect) {
- printk("%s> connecting driver %p %s\n", __FUNCTION__,
- drv, drv->name);
- drv->connect(drv);
- }
- printk("%s< driver %p %s\n", __FUNCTION__, drv, drv->name);
- return 0;
-}
-
+static unsigned int char_count(const char *str, char c)
+{
+ unsigned int i, ret = 0;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c)
+ ret++;
+ return ret;
+}
+
+static void dev_changed(struct xenbus_watch *watch, const char *node)
+{
+ char busid[BUS_ID_SIZE];
+ int exists;
+ struct xenbus_device *dev;
+ char *p;
+
+ /* Node is of form device/<type>/<identifier>[/...] */
+ if (char_count(node, '/') != 2)
+ return;
+
+ /* Created or deleted? */
+ exists = xenbus_exists(node, "");
+
+ p = strchr(node, '/') + 1;
+ if (strlen(p) + 1 > BUS_ID_SIZE) {
+ printk("Device for node %s is too big!\n", node);
+ return;
+ }
+ /* Bus ID is name with / changed to - */
+ strcpy(busid, p);
+ *strchr(busid, '/') = '-';
+
+ dev = xenbus_device_find(busid);
+ printk("xenbus: device %s %s\n", busid, dev ? "exists" : "new");
+ if (dev && !exists) {
+ printk("xenbus: Unregistering device %s\n", busid);
+ /* FIXME: free? */
+ device_unregister(&dev->dev);
+ } else if (!dev && exists) {
+ printk("xenbus: Adding device %s\n", busid);
+ /* Hack bus id back into two strings. */
+ *strrchr(busid, '-') = '\0';
+ xenbus_probe_device("device", busid, busid+strlen(busid)+1);
+ } else
+ printk("xenbus: strange, %s already %s\n", busid,
+ exists ? "exists" : "gone");
+ if (dev)
+ put_device(&dev->dev);
+}
+
+/* We watch for devices appearing and vanishing. */
+static struct xenbus_watch dev_watch = {
+ /* FIXME: Ideally we'd only watch for changes 2 levels deep... */
+ .node = "device",
+ .callback = dev_changed,
+};
/* called from a thread in privcmd/privcmd.c */
int do_xenbus_probe(void *unused)
{
int err = 0;
- printk("%s> xs_init_done=%d\n", __FUNCTION__, xs_init_done);
- if (xs_init_done)
- goto exit;
/* Initialize xenstore comms unless already done. */
printk("store_evtchn = %i\n", xen_start_info.store_evtchn);
err = xs_init();
if (err) {
printk("XENBUS: Error initializing xenstore comms:"
" %i\n", err);
- goto exit;
- }
- xs_init_done = 1;
-
- /* Notify drivers that xenstore has connected. */
- printk("%s> connect drivers...\n", __FUNCTION__);
- xenbus_for_each_drv(NULL, NULL, xenbus_driver_connect);
- printk("%s> connect backends...\n", __FUNCTION__);
- xenbus_for_each_backend(NULL, NULL, xenbus_driver_connect);
-
- /* Enumerate devices and backends in xenstore. */
+ return err;
+ }
+
+ down(&xenbus_lock);
+ /* Enumerate devices in xenstore. */
xenbus_probe_devices("device");
- xenbus_probe_backends("backend");
-
-exit:
- printk("%s< err=%d\n", __FUNCTION__, err);
- return err;
+ /* Watch for changes. */
+ register_xenbus_watch(&dev_watch);
+ up(&xenbus_lock);
+ return 0;
}
static int __init xenbus_probe_init(void)
{
bus_register(&xenbus_type);
- bus_register(&xenback_type);
if (!xen_start_info.store_evtchn)
return 0;
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Fri Jul 29
20:25:03 2005
@@ -2,7 +2,7 @@
* xenbus_xs.c
*
* This is the kernel equivalent of the "xs" library. We don't need everything
- * and we use xenbus_comms to communication.
+ * and we use xenbus_comms for communication.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
@@ -43,9 +43,9 @@
#define streq(a, b) (strcmp((a), (b)) == 0)
-static void *xs_in, *xs_out;
+static char printf_buffer[4096];
static LIST_HEAD(watches);
-DECLARE_MUTEX(xs_lock);
+DECLARE_MUTEX(xenbus_lock);
static int get_error(const char *errorstring)
{
@@ -68,7 +68,7 @@
void *ret;
int err;
- err = xb_read(xs_in, &msg, sizeof(msg));
+ err = xb_read(&msg, sizeof(msg));
if (err)
return ERR_PTR(err);
@@ -76,7 +76,7 @@
if (!ret)
return ERR_PTR(-ENOMEM);
- err = xb_read(xs_in, ret, msg.len);
+ err = xb_read(ret, msg.len);
if (err) {
kfree(ret);
return ERR_PTR(err);
@@ -90,18 +90,17 @@
}
/* Emergency write. */
-void xs_debug_write(const char *str, unsigned int count)
+void xenbus_debug_write(const char *str, unsigned int count)
{
struct xsd_sockmsg msg;
- void *out = (void *)xen_start_info.store_page;
msg.type = XS_DEBUG;
msg.len = sizeof("print") + count + 1;
- xb_write(out, &msg, sizeof(msg));
- xb_write(out, "print", sizeof("print"));
- xb_write(out, str, count);
- xb_write(out, "", 1);
+ xb_write(&msg, sizeof(msg));
+ xb_write("print", sizeof("print"));
+ xb_write(str, count);
+ xb_write("", 1);
}
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
@@ -115,19 +114,19 @@
unsigned int i;
int err;
- WARN_ON(down_trylock(&xs_lock) == 0);
+ WARN_ON(down_trylock(&xenbus_lock) == 0);
msg.type = type;
msg.len = 0;
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len;
- err = xb_write(xs_out, &msg, sizeof(msg));
+ err = xb_write(&msg, sizeof(msg));
if (err)
return ERR_PTR(err);
for (i = 0; i < num_vecs; i++) {
- err = xb_write(xs_out, iovec[i].iov_base, iovec[i].iov_len);;
+ err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
if (err)
return ERR_PTR(err);
}
@@ -182,12 +181,28 @@
return num;
}
-char **xs_directory(const char *path, unsigned int *num)
+/* Return the path to dir with /name appended. */
+static char *join(const char *dir, const char *name)
+{
+ static char buffer[4096];
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+ BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
+
+ strcpy(buffer, dir);
+ if (!streq(name, "")) {
+ strcat(buffer, "/");
+ strcat(buffer, name);
+ }
+ return buffer;
+}
+
+char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
{
char *strings, *p, **ret;
unsigned int len;
- strings = xs_single(XS_DIRECTORY, path, &len);
+ strings = xs_single(XS_DIRECTORY, join(dir, node), &len);
if (IS_ERR(strings))
return (char **)strings;
@@ -210,67 +225,37 @@
}
/* Check if a path exists. Return 1 if it does. */
-int xs_exists(const char *path)
-{
- char **dir;
+int xenbus_exists(const char *dir, const char *node)
+{
+ char **d;
int dir_n;
- dir = xs_directory(path, &dir_n);
- if (IS_ERR(dir))
+ d = xenbus_directory(dir, node, &dir_n);
+ if (IS_ERR(d))
return 0;
- kfree(dir);
+ kfree(d);
return 1;
}
-
-/* Make a directory, creating dirs on the path to it if necessary.
- * Return 0 on success, error code otherwise.
- */
-int xs_mkdirs(const char *path)
-{
- int err = 0;
- char s[strlen(path) + 1], *p = s;
-
- if (xs_exists(path))
- goto out;
- strcpy(p, path);
- if (*p == '/')
- p++;
- for (;;) {
- p = strchr(p, '/');
- if (p)
- *p = '\0';
- if (!xs_exists(s)) {
- err = xs_mkdir(s);
- if (err)
- goto out;
- }
- if (!p)
- break;
- *p++ = '/';
- }
- out:
- return err;
-}
-
/* Get the value of a single file.
* Returns a kmalloced value: call free() on it after use.
* len indicates length in bytes.
*/
-void *xs_read(const char *path, unsigned int *len)
-{
- return xs_single(XS_READ, path, len);
+void *xenbus_read(const char *dir, const char *node, unsigned int *len)
+{
+ return xs_single(XS_READ, join(dir, node), len);
}
/* Write the value of a single file.
* Returns -err on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
*/
-int xs_write(const char *path,
- const void *data, unsigned int len, int createflags)
-{
- const char *flags;
+int xenbus_write(const char *dir, const char *node,
+ const char *string, int createflags)
+{
+ const char *flags, *path;
struct kvec iovec[3];
+ path = join(dir, node);
/* Format: Flags (as string), path, data. */
if (createflags == 0)
flags = XS_WRITE_NONE;
@@ -285,22 +270,22 @@
iovec[0].iov_len = strlen(path) + 1;
iovec[1].iov_base = (void *)flags;
iovec[1].iov_len = strlen(flags) + 1;
- iovec[2].iov_base = (void *)data;
- iovec[2].iov_len = len;
+ iovec[2].iov_base = (void *)string;
+ iovec[2].iov_len = strlen(string);
return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
}
/* Create a new directory. */
-int xs_mkdir(const char *path)
-{
- return xs_error(xs_single(XS_MKDIR, path, NULL));
+int xenbus_mkdir(const char *dir, const char *node)
+{
+ return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
}
/* Destroy a file or directory (directories must be empty). */
-int xs_rm(const char *path)
-{
- return xs_error(xs_single(XS_RM, path, NULL));
+int xenbus_rm(const char *dir, const char *node)
+{
+ return xs_error(xs_single(XS_RM, join(dir, node), NULL));
}
/* Start a transaction: changes by others will not be seen during this
@@ -308,7 +293,7 @@
* Transaction only applies to the given subtree.
* You can only have one transaction at any time.
*/
-int xs_transaction_start(const char *subtree)
+int xenbus_transaction_start(const char *subtree)
{
return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL));
}
@@ -316,7 +301,7 @@
/* End a transaction.
* If abandon is true, transaction is discarded instead of committed.
*/
-int xs_transaction_end(int abort)
+int xenbus_transaction_end(int abort)
{
char abortstr[2];
@@ -327,26 +312,109 @@
return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
}
-char *xs_get_domain_path(domid_t domid)
-{
- char domid_str[32];
-
- sprintf(domid_str, "%u", domid);
- return xs_single(XS_GETDOMAINPATH, domid_str, NULL);
-}
-
-static int xs_watch(const char *path, const char *token, unsigned int priority)
-{
- char prio[32];
- struct kvec iov[3];
-
- sprintf(prio, "%u", priority);
+/* Single read and scanf: returns -errno or num scanned. */
+int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ char *val;
+
+ val = xenbus_read(dir, node, NULL);
+ if (IS_ERR(val))
+ return PTR_ERR(val);
+
+ va_start(ap, fmt);
+ ret = vsscanf(val, fmt, ap);
+ va_end(ap);
+ kfree(val);
+ /* Distinctive errno. */
+ if (ret == 0)
+ return -ERANGE;
+ return ret;
+}
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+ va_start(ap, fmt);
+ ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
+ va_end(ap);
+
+ BUG_ON(ret > sizeof(printf_buffer)-1);
+ return xenbus_write(dir, node, printf_buffer, O_CREAT);
+}
+
+/* Report a (negative) errno into the store, with explanation. */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ unsigned int len;
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+
+ len = sprintf(printf_buffer, "%i ", -err);
+ va_start(ap, fmt);
+ ret = vsnprintf(printf_buffer+len, sizeof(printf_buffer)-len, fmt, ap);
+ va_end(ap);
+
+ BUG_ON(len + ret > sizeof(printf_buffer)-1);
+ dev->has_error = 1;
+ if (xenbus_write(dev->nodename, "error", printf_buffer, O_CREAT) != 0)
+ printk("xenbus: failed to write error node for %s (%s)\n",
+ dev->nodename, printf_buffer);
+}
+
+/* Clear any error. */
+void xenbus_dev_ok(struct xenbus_device *dev)
+{
+ if (dev->has_error) {
+ if (xenbus_rm(dev->nodename, "error") != 0)
+ printk("xenbus: failed to clear error node for %s\n",
+ dev->nodename);
+ else
+ dev->has_error = 0;
+ }
+}
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xenbus_gather(const char *dir, ...)
+{
+ va_list ap;
+ const char *name;
+ int ret = 0;
+
+ va_start(ap, dir);
+ while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+ const char *fmt = va_arg(ap, char *);
+ void *result = va_arg(ap, void *);
+ char *p;
+
+ p = xenbus_read(dir, name, NULL);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ break;
+ }
+ if (sscanf(p, fmt, result) == 0)
+ ret = -EINVAL;
+ kfree(p);
+ }
+ va_end(ap);
+ return ret;
+}
+
+static int xs_watch(const char *path, const char *token)
+{
+ struct kvec iov[2];
+
iov[0].iov_base = (void *)path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (void *)token;
iov[1].iov_len = strlen(token) + 1;
- iov[2].iov_base = prio;
- iov[2].iov_len = strlen(prio) + 1;
return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
}
@@ -405,7 +473,7 @@
sprintf(token, "%lX", (long)watch);
BUG_ON(find_watch(token));
- err = xs_watch(watch->node, token, watch->priority);
+ err = xs_watch(watch->node, token);
if (!err)
list_add(&watch->list, &watches);
return err;
@@ -423,45 +491,43 @@
list_del(&watch->list);
if (err)
- printk(KERN_WARNING "XENBUS Failed to release watch %s: %i\n",
+ printk(KERN_WARNING
+ "XENBUS Failed to release watch %s: %i\n",
watch->node, err);
}
static int watch_thread(void *unused)
{
-
for (;;) {
char *token;
char *node = NULL;
- wait_event(xb_waitq, xs_input_avail(xs_in));
+ wait_event(xb_waitq, xs_input_avail());
/* If this is a spurious wakeup caused by someone
* doing an op, they'll hold the lock and the buffer
* will be empty by the time we get there.
*/
- down(&xs_lock);
- if (xs_input_avail(xs_in))
+ down(&xenbus_lock);
+ if (xs_input_avail())
node = xs_read_watch(&token);
if (node && !IS_ERR(node)) {
struct xenbus_watch *w;
int err;
+ err = xs_acknowledge_watch(token);
+ if (err)
+ printk(KERN_WARNING "XENBUS ack %s fail %i\n",
+ node, err);
w = find_watch(token);
BUG_ON(!w);
w->callback(w, node);
- /* FIXME: Only ack if it wasn't deleted. */
- err = xs_acknowledge_watch(token);
- if (err)
- printk(KERN_WARNING
- "XENBUS acknowledge %s failed %i\n",
- node, err);
kfree(node);
} else
printk(KERN_WARNING "XENBUS xs_read_watch: %li\n",
PTR_ERR(node));
- up(&xs_lock);
+ up(&xenbus_lock);
}
}
@@ -470,7 +536,7 @@
int err;
struct task_struct *watcher;
- err = xb_init_comms(&xs_in, &xs_out);
+ err = xb_init_comms();
if (err)
return err;
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h Fri Jul 29
18:52:33 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h Fri Jul 29
20:25:03 2005
@@ -369,4 +369,6 @@
#endif /* __KERNEL__ */
-#endif
+#define ARCH_HAS_DEV_MEM
+
+#endif
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Jul 29 20:25:03 2005
@@ -97,8 +97,8 @@
void xen_pte_pin(unsigned long ptr);
void xen_pte_unpin(unsigned long ptr);
#else
-#define xen_l1_entry_update(_p, _v) set_pte((_p), (pte_t){(_v)})
-#define xen_l2_entry_update(_p, _v) set_pgd((_p), (pgd_t){(_v)})
+#define xen_l1_entry_update(_p, _v) set_pte((_p), (_v))
+#define xen_l2_entry_update(_p, _v) set_pgd((_p), (_v))
#define xen_pgd_pin(_p) ((void)0)
#define xen_pgd_unpin(_p) ((void)0)
#define xen_pte_pin(_p) ((void)0)
@@ -140,6 +140,14 @@
#endif
#include <asm/hypercall.h>
+
+#if defined(CONFIG_X86_64)
+#define MULTI_UVMFLAGS_INDEX 2
+#define MULTI_UVMDOMID_INDEX 3
+#else
+#define MULTI_UVMFLAGS_INDEX 3
+#define MULTI_UVMDOMID_INDEX 4
+#endif
static inline void
MULTI_update_va_mapping(
diff -r a4196568095c -r b53a65034532
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Jul 29 20:25:03 2005
@@ -36,8 +36,8 @@
char *devicetype;
char *subtype;
char *nodename;
- int id;
struct device dev;
+ int has_error;
void *data;
};
@@ -58,21 +58,10 @@
char *name;
struct module *owner;
const struct xenbus_device_id *ids;
- /* Called when xenstore is connected. */
- int (*connect) (struct xenbus_driver * drv);
-
- int (*probe) (struct xenbus_device * dev, const struct
xenbus_device_id * id);
- int (*remove) (struct xenbus_device * dev);
- int (*configure)(struct xenbus_device * dev);
-
+ int (*probe) (struct xenbus_device * dev,
+ const struct xenbus_device_id * id);
+ int (*remove) (struct xenbus_device * dev);
struct device_driver driver;
-};
-
-struct xenbus_evtchn {
- unsigned long dom1;
- unsigned long port1;
- unsigned long dom2;
- unsigned long port2;
};
static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
@@ -83,63 +72,47 @@
int xenbus_register_driver(struct xenbus_driver *drv);
void xenbus_unregister_driver(struct xenbus_driver *drv);
-int xenbus_register_backend(struct xenbus_driver *drv);
-void xenbus_unregister_backend(struct xenbus_driver *drv);
-
-/* Iterator over xenbus devices (frontend). */
-int xenbus_for_each_dev(struct xenbus_device * start, void * data,
- int (*fn)(struct xenbus_device *, void *));
-
-/* Iterator over xenbus drivers (frontend). */
-int xenbus_for_each_drv(struct xenbus_driver * start, void * data,
- int (*fn)(struct xenbus_driver *, void *));
-
-/* Iterator over xenbus drivers (backend). */
-int xenbus_for_each_backend(struct xenbus_driver * start, void * data,
- int (*fn)(struct xenbus_driver *, void *));
-
/* Caller must hold this lock to call these functions: it's also held
* across watch callbacks. */
-extern struct semaphore xs_lock;
+extern struct semaphore xenbus_lock;
-char **xs_directory(const char *path, unsigned int *num);
-void *xs_read(const char *path, unsigned int *len);
-int xs_write(const char *path,
- const void *data, unsigned int len, int createflags);
-int xs_mkdir(const char *path);
-int xs_exists(const char *path);
-int xs_mkdirs(const char *path);
-int xs_rm(const char *path);
-int xs_transaction_start(const char *subtree);
-int xs_transaction_end(int abort);
-char *xs_get_domain_path(domid_t domid);
+char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
+void *xenbus_read(const char *dir, const char *node, unsigned int *len);
+int xenbus_write(const char *dir, const char *node,
+ const char *string, int createflags);
+int xenbus_mkdir(const char *dir, const char *node);
+int xenbus_exists(const char *dir, const char *node);
+int xenbus_rm(const char *dir, const char *node);
+int xenbus_transaction_start(const char *subtree);
+int xenbus_transaction_end(int abort);
+
+/* Single read and scanf: returns -errno or num scanned if > 0. */
+int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+ __attribute__((format(scanf, 3, 4)));
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+
+/* Generic read function: NULL-terminated triples of name,
+ * sprintf-style type string, and pointer. Returns 0 or errno.*/
+int xenbus_gather(const char *dir, ...);
+
+/* Report a (negative) errno into the store, with explanation. */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,...);
+
+/* Clear any error. */
+void xenbus_dev_ok(struct xenbus_device *dev);
/* Register callback to watch this node. */
struct xenbus_watch
{
struct list_head list;
char *node;
- unsigned int priority;
void (*callback)(struct xenbus_watch *, const char *node);
};
int register_xenbus_watch(struct xenbus_watch *watch);
void unregister_xenbus_watch(struct xenbus_watch *watch);
-/* Generic read function: NULL-terminated triples of name,
- * sprintf-style type string, and pointer. */
-int xenbus_gather(const char *dir, ...);
-
-char *xenbus_path(const char *dir, const char *name);
-char *xenbus_read(const char *dir, const char *name, unsigned int *data_n);
-int xenbus_write(const char *dir, const char *name,
- const char *data, int data_n);
-
-int xenbus_read_string(const char *dir, const char *name, char **val);
-int xenbus_write_string(const char *dir, const char *name, const char *val);
-int xenbus_read_ulong(const char *dir, const char *name, unsigned long *val);
-int xenbus_write_ulong(const char *dir, const char *name, unsigned long val);
-int xenbus_read_long(const char *dir, const char *name, long *val);
-int xenbus_write_long(const char *dir, const char *name, long val);
-
#endif /* _ASM_XEN_XENBUS_H */
diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/include/linux/mm.h Fri Jul 29 20:25:03 2005
@@ -816,10 +816,6 @@
extern int check_user_page_readable(struct mm_struct *mm, unsigned long
address);
int remap_pfn_range(struct vm_area_struct *, unsigned long,
unsigned long, unsigned long, pgprot_t);
-/* Allow arch override for mapping of device and I/O (non-RAM) pages. */
-#ifndef io_remap_pfn_range
-#define io_remap_pfn_range remap_pfn_range
-#endif
#ifdef CONFIG_PROC_FS
void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
diff -r a4196568095c -r b53a65034532 patches/linux-2.6.12/smp-alts.patch
--- a/patches/linux-2.6.12/smp-alts.patch Fri Jul 29 18:52:33 2005
+++ b/patches/linux-2.6.12/smp-alts.patch Fri Jul 29 20:25:03 2005
@@ -1,7 +1,7 @@
-diff -Naur linux-2.6.11/arch/i386/Kconfig linux-2.6.11.post/arch/i386/Kconfig
---- linux-2.6.11/arch/i386/Kconfig 2005-03-02 07:37:49.000000000 +0000
-+++ linux-2.6.11.post/arch/i386/Kconfig 2005-06-10 13:42:35.000000000
+0100
-@@ -481,6 +481,19 @@
+diff -Naur linux-2.6.12/arch/i386/Kconfig linux-2.6.12.post/arch/i386/Kconfig
+--- linux-2.6.12/arch/i386/Kconfig 2005-06-17 15:48:29.000000000 -0400
++++ linux-2.6.12.post/arch/i386/Kconfig 2005-07-25 05:51:21.000000000
-0400
+@@ -487,6 +487,19 @@
If you don't know what to do here, say N.
@@ -21,10 +21,10 @@
config NR_CPUS
int "Maximum number of CPUs (2-255)"
range 2 255
-diff -Naur linux-2.6.11/arch/i386/kernel/Makefile
linux-2.6.11.post/arch/i386/kernel/Makefile
---- linux-2.6.11/arch/i386/kernel/Makefile 2005-03-02 07:37:49.000000000
+0000
-+++ linux-2.6.11.post/arch/i386/kernel/Makefile 2005-06-16
11:16:18.555332435 +0100
-@@ -32,6 +32,7 @@
+diff -Naur linux-2.6.12/arch/i386/kernel/Makefile
linux-2.6.12.post/arch/i386/kernel/Makefile
+--- linux-2.6.12/arch/i386/kernel/Makefile 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/arch/i386/kernel/Makefile 2005-07-25
05:51:21.000000000 -0400
+@@ -33,6 +33,7 @@
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
@@ -32,9 +32,9 @@
EXTRA_AFLAGS := -traditional
-diff -Naur linux-2.6.11/arch/i386/kernel/smpalts.c
linux-2.6.11.post/arch/i386/kernel/smpalts.c
---- linux-2.6.11/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000
+0100
-+++ linux-2.6.11.post/arch/i386/kernel/smpalts.c 2005-06-16
11:23:39.300902424 +0100
+diff -Naur linux-2.6.12/arch/i386/kernel/smpalts.c
linux-2.6.12.post/arch/i386/kernel/smpalts.c
+--- linux-2.6.12/arch/i386/kernel/smpalts.c 1969-12-31 19:00:00.000000000
-0500
++++ linux-2.6.12.post/arch/i386/kernel/smpalts.c 2005-07-25
05:51:21.000000000 -0400
@@ -0,0 +1,85 @@
+#include <linux/kernel.h>
+#include <asm/system.h>
@@ -121,10 +121,10 @@
+ asm volatile ("jmp 1f\n1:");
+ mb();
+}
-diff -Naur linux-2.6.11/arch/i386/kernel/smpboot.c
linux-2.6.11.post/arch/i386/kernel/smpboot.c
---- linux-2.6.11/arch/i386/kernel/smpboot.c 2005-03-02 07:38:09.000000000
+0000
-+++ linux-2.6.11.post/arch/i386/kernel/smpboot.c 2005-06-16
11:17:09.287064617 +0100
-@@ -1003,6 +1003,11 @@
+diff -Naur linux-2.6.12/arch/i386/kernel/smpboot.c
linux-2.6.12.post/arch/i386/kernel/smpboot.c
+--- linux-2.6.12/arch/i386/kernel/smpboot.c 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/arch/i386/kernel/smpboot.c 2005-07-25
05:51:21.000000000 -0400
+@@ -1001,6 +1001,11 @@
if (max_cpus <= cpucount+1)
continue;
@@ -136,7 +136,7 @@
if (do_boot_cpu(apicid))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
-@@ -1118,6 +1123,11 @@
+@@ -1130,6 +1135,11 @@
return -EIO;
}
@@ -148,9 +148,9 @@
local_irq_enable();
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
-diff -Naur linux-2.6.11/arch/i386/kernel/vmlinux.lds.S
linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S
---- linux-2.6.11/arch/i386/kernel/vmlinux.lds.S 2005-03-02
07:38:37.000000000 +0000
-+++ linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S 2005-06-10
11:14:14.000000000 +0100
+diff -Naur linux-2.6.12/arch/i386/kernel/vmlinux.lds.S
linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S
+--- linux-2.6.12/arch/i386/kernel/vmlinux.lds.S 2005-06-17
15:48:29.000000000 -0400
++++ linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S 2005-07-25
05:51:21.000000000 -0400
@@ -30,6 +30,13 @@
__ex_table : { *(__ex_table) }
__stop___ex_table = .;
@@ -165,9 +165,9 @@
RODATA
/* writeable */
-diff -Naur linux-2.6.11/include/asm-i386/atomic.h
linux-2.6.11.post/include/asm-i386/atomic.h
---- linux-2.6.11/include/asm-i386/atomic.h 2005-03-02 07:37:51.000000000
+0000
-+++ linux-2.6.11.post/include/asm-i386/atomic.h 2005-06-13
10:10:39.000000000 +0100
+diff -Naur linux-2.6.12/include/asm-i386/atomic.h
linux-2.6.12.post/include/asm-i386/atomic.h
+--- linux-2.6.12/include/asm-i386/atomic.h 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/include/asm-i386/atomic.h 2005-07-25
05:51:21.000000000 -0400
@@ -4,18 +4,13 @@
#include <linux/config.h>
#include <linux/compiler.h>
@@ -188,9 +188,9 @@
/*
* Make sure gcc doesn't try to be clever and move things around
* on us. We need to use _exactly_ the address the user gave us,
-diff -Naur linux-2.6.11/include/asm-i386/bitops.h
linux-2.6.11.post/include/asm-i386/bitops.h
---- linux-2.6.11/include/asm-i386/bitops.h 2005-03-02 07:38:12.000000000
+0000
-+++ linux-2.6.11.post/include/asm-i386/bitops.h 2005-06-13
10:11:54.000000000 +0100
+diff -Naur linux-2.6.12/include/asm-i386/bitops.h
linux-2.6.12.post/include/asm-i386/bitops.h
+--- linux-2.6.12/include/asm-i386/bitops.h 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/include/asm-i386/bitops.h 2005-07-25
05:51:21.000000000 -0400
@@ -7,6 +7,7 @@
#include <linux/config.h>
@@ -266,9 +266,9 @@
"btcl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr) : "memory");
-diff -Naur linux-2.6.11/include/asm-i386/rwsem.h
linux-2.6.11.post/include/asm-i386/rwsem.h
---- linux-2.6.11/include/asm-i386/rwsem.h 2005-03-02 07:38:08.000000000
+0000
-+++ linux-2.6.11.post/include/asm-i386/rwsem.h 2005-06-13 10:13:06.000000000
+0100
+diff -Naur linux-2.6.12/include/asm-i386/rwsem.h
linux-2.6.12.post/include/asm-i386/rwsem.h
+--- linux-2.6.12/include/asm-i386/rwsem.h 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/include/asm-i386/rwsem.h 2005-07-25 05:51:21.000000000
-0400
@@ -40,6 +40,7 @@
#include <linux/list.h>
@@ -349,9 +349,9 @@
: "+r"(tmp), "=m"(sem->count)
: "r"(sem), "m"(sem->count)
: "memory");
-diff -Naur linux-2.6.11/include/asm-i386/smp_alt.h
linux-2.6.11.post/include/asm-i386/smp_alt.h
---- linux-2.6.11/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000
+0100
-+++ linux-2.6.11.post/include/asm-i386/smp_alt.h 2005-06-16
11:16:50.109433206 +0100
+diff -Naur linux-2.6.12/include/asm-i386/smp_alt.h
linux-2.6.12.post/include/asm-i386/smp_alt.h
+--- linux-2.6.12/include/asm-i386/smp_alt.h 1969-12-31 19:00:00.000000000
-0500
++++ linux-2.6.12.post/include/asm-i386/smp_alt.h 2005-07-25
05:51:21.000000000 -0400
@@ -0,0 +1,32 @@
+#ifndef __ASM_SMP_ALT_H__
+#define __ASM_SMP_ALT_H__
@@ -385,9 +385,9 @@
+#endif
+
+#endif /* __ASM_SMP_ALT_H__ */
-diff -Naur linux-2.6.11/include/asm-i386/spinlock.h
linux-2.6.11.post/include/asm-i386/spinlock.h
---- linux-2.6.11/include/asm-i386/spinlock.h 2005-03-02 07:37:50.000000000
+0000
-+++ linux-2.6.11.post/include/asm-i386/spinlock.h 2005-06-13
14:13:52.000000000 +0100
+diff -Naur linux-2.6.12/include/asm-i386/spinlock.h
linux-2.6.12.post/include/asm-i386/spinlock.h
+--- linux-2.6.12/include/asm-i386/spinlock.h 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/include/asm-i386/spinlock.h 2005-07-25
05:51:21.000000000 -0400
@@ -6,6 +6,7 @@
#include <asm/page.h>
#include <linux/config.h>
@@ -467,9 +467,9 @@
static inline int _raw_read_trylock(rwlock_t *lock)
{
-diff -Naur linux-2.6.11/include/asm-i386/system.h
linux-2.6.11.post/include/asm-i386/system.h
---- linux-2.6.11/include/asm-i386/system.h 2005-03-02 07:37:30.000000000
+0000
-+++ linux-2.6.11.post/include/asm-i386/system.h 2005-06-15
13:21:40.000000000 +0100
+diff -Naur linux-2.6.12/include/asm-i386/system.h
linux-2.6.12.post/include/asm-i386/system.h
+--- linux-2.6.12/include/asm-i386/system.h 2005-06-17 15:48:29.000000000
-0400
++++ linux-2.6.12.post/include/asm-i386/system.h 2005-07-25
05:51:21.000000000 -0400
@@ -5,7 +5,7 @@
#include <linux/kernel.h>
#include <asm/segment.h>
diff -r a4196568095c -r b53a65034532 tools/debugger/gdb/gdbbuild
--- a/tools/debugger/gdb/gdbbuild Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/gdb/gdbbuild Fri Jul 29 20:25:03 2005
@@ -1,6 +1,6 @@
#!/bin/sh
-XENROOT=`bk root`
+XENROOT=`hg root`
export XENROOT
cd $XENROOT/tools/debugger/gdb
diff -r a4196568095c -r b53a65034532 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/libxendebug/xendebug.c Fri Jul 29 20:25:03 2005
@@ -42,7 +42,6 @@
struct list_head list;
memory_t address;
u32 domain;
- u16 vcpu;
u8 old_value; /* old value for software bkpt */
} bwcpoint_t, *bwcpoint_p;
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/PDB.ml
--- a/tools/debugger/pdb/PDB.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/PDB.ml Fri Jul 29 20:25:03 2005
@@ -7,9 +7,12 @@
* @version 1
*)
+open Util
+
exception Unimplemented of string
exception Unknown_context of string
exception Unknown_domain
+exception Unknown_process
type context_t =
| Void
@@ -44,6 +47,31 @@
let delete_context key =
Hashtbl.remove hash key
+
+
+(**
+ find_process : Locate the socket associated with the context(s)
+ matching a particular (domain, process id) pair. if there are multiple
+ contexts (there shouldn't be), then return the first one.
+ *)
+
+let find_process dom pid =
+ let find key ctx list =
+ match ctx with
+ | Process p ->
+ if (((Process.get_domain p) = dom) &&
+ ((Process.get_process p) = pid))
+ then
+ key :: list
+ else
+ list
+ | _ -> list
+ in
+ let sock_list = Hashtbl.fold find hash [] in
+ match sock_list with
+ | hd::tl -> hd
+ | [] -> raise Unknown_process
+
(**
find_domain : Locate the socket associated with the context(s)
@@ -98,18 +126,25 @@
begin
let xdom_sock = find_xen_domain_context (Process.get_domain p) in
let xdom_ctx = find_context xdom_sock in
- match xdom_ctx with
- | Xen_domain d ->
- Process.attach_debugger p d
- | _ -> failwith ("context has wrong xen domain type")
+ begin
+ match xdom_ctx with
+ | Xen_domain d ->
+ Process.attach_debugger p d
+ | _ -> failwith ("context has wrong xen domain type")
+ end;
+ raise No_reply
end
| _ -> raise (Unimplemented "attach debugger")
let detach_debugger ctx =
match ctx with
- | Domain d -> Domain.detach_debugger (Domain.get_domain d)
- (Domain.get_vcpu d)
- | Process p -> Process.detach_debugger p
+ | Domain d ->
+ Domain.detach_debugger (Domain.get_domain d)
+ (Domain.get_vcpu d);
+ "OK"
+ | Process p ->
+ Process.detach_debugger p;
+ raise No_reply
| _ -> raise (Unimplemented "detach debugger")
@@ -158,8 +193,8 @@
match params with
| dom::pid::_ ->
let p = Process(Process.new_context dom pid) in
- attach_debugger p;
- Hashtbl.replace hash key p
+ Hashtbl.replace hash key p;
+ attach_debugger p
| _ -> failwith "bogus parameters to process context"
end
| "xen domain"
@@ -188,26 +223,42 @@
match ctx with
| Void -> Intel.null_registers (* default for startup *)
| Domain d -> Domain.read_registers d
- | Process p -> Process.read_registers p
+ | Process p ->
+ begin
+ Process.read_registers p;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "read registers")
let write_register ctx register value =
match ctx with
| Domain d -> Domain.write_register d register value
- | Process p -> Process.write_register p register value
+ | Process p ->
+ begin
+ Process.write_register p register value;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "write register")
let read_memory ctx addr len =
match ctx with
| Domain d -> Domain.read_memory d addr len
- | Process p -> Process.read_memory p addr len
+ | Process p ->
+ begin
+ Process.read_memory p addr len;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "read memory")
let write_memory ctx addr values =
match ctx with
| Domain d -> Domain.write_memory d addr values
- | Process p -> Process.write_memory p addr values
+ | Process p ->
+ begin
+ Process.write_memory p addr values;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "write memory")
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Process.ml
--- a/tools/debugger/pdb/Process.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/Process.ml Fri Jul 29 20:25:03 2005
@@ -54,10 +54,10 @@
proc_ctx.ring <- Xen_domain.get_ring dom_ctx;
_attach_debugger proc_ctx
-external read_registers : context_t -> registers = "proc_read_registers"
+external read_registers : context_t -> unit = "proc_read_registers"
external write_register : context_t -> register -> int32 -> unit =
"proc_write_register"
-external read_memory : context_t -> int32 -> int -> int list =
+external read_memory : context_t -> int32 -> int -> unit =
"proc_read_memory"
external write_memory : context_t -> int32 -> int list -> unit =
"proc_write_memory"
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Process.mli
--- a/tools/debugger/pdb/Process.mli Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/Process.mli Fri Jul 29 20:25:03 2005
@@ -27,9 +27,9 @@
val pause : context_t -> unit
-val read_registers : context_t -> registers
+val read_registers : context_t -> unit
val write_register : context_t -> register -> int32 -> unit
-val read_memory : context_t -> int32 -> int -> int list
+val read_memory : context_t -> int32 -> int -> unit
val write_memory : context_t -> int32 -> int list -> unit
val continue : context_t -> unit
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Util.ml
--- a/tools/debugger/pdb/Util.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/Util.ml Fri Jul 29 20:25:03 2005
@@ -154,3 +154,12 @@
* BUG NEED TO LISTEN FOR REPLY +/- AND POSSIBLY RE-TRANSMIT
*)
+
+(** A few debugger commands such as step 's' and continue 'c' do
+ * not immediately return a response to the debugger. In these
+ * cases we raise No_reply instead.
+ * This is also used by some contexts (such as Linux processes)
+ * which utilize an asynchronous request / response protocol when
+ * communicating with their respective backends.
+ *)
+exception No_reply
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Xen_domain.ml
--- a/tools/debugger/pdb/Xen_domain.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/Xen_domain.ml Fri Jul 29 20:25:03 2005
@@ -40,4 +40,4 @@
let string_of_context ctx =
Printf.sprintf "{xen domain assist} domain: %d" ctx.domain
-external process_response : int32 -> unit = "process_handle_response"
+external process_response : int32 -> int * int * string =
"process_handle_response"
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Xen_domain.mli
--- a/tools/debugger/pdb/Xen_domain.mli Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/Xen_domain.mli Fri Jul 29 20:25:03 2005
@@ -21,5 +21,5 @@
val string_of_context : context_t -> string
-val process_response : int32 -> unit
+val process_response : int32 -> int * int * string
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/debugger.ml
--- a/tools/debugger/pdb/debugger.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/debugger.ml Fri Jul 29 20:25:03 2005
@@ -12,12 +12,6 @@
open Util
open Str
-(** a few debugger commands such as step 's' and continue 'c' do
- * not immediately return a response to the debugger. in these
- * cases we raise No_reply instead.
- *)
-exception No_reply
-
let initialize_debugger () =
()
@@ -31,8 +25,7 @@
hash. It will be cleaned up with the socket is closed.
*)
let gdb_detach ctx =
- PDB.detach_debugger ctx;
- raise No_reply
+ PDB.detach_debugger ctx
(**
Kill Command
@@ -295,12 +288,17 @@
let channel = Evtchn.read fd in
let ctx = find_context fd in
+ let (dom, pid, str) =
begin
match ctx with
| Xen_domain d -> Xen_domain.process_response (Xen_domain.get_ring d)
| _ -> failwith ("process_xen_domain called without Xen_domain context")
- end;
-
+ end
+ in
+ let sock = PDB.find_process dom pid in
+ print_endline (Printf.sprintf "(linux) dom:%d pid:%d %s %s"
+ dom pid str (Util.get_connection_info sock));
+ Util.send_reply sock str;
Evtchn.unmask fd channel (* allow next virq *)
diff -r a4196568095c -r b53a65034532
tools/debugger/pdb/linux-2.6-module/Makefile
--- a/tools/debugger/pdb/linux-2.6-module/Makefile Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/linux-2.6-module/Makefile Fri Jul 29 20:25:03 2005
@@ -1,4 +1,4 @@
-XEN_ROOT=../../../..
+XEN_ROOT = ../../../..
LINUX_DIR = linux-2.6.12-xenU
KDIR = $(XEN_ROOT)/$(LINUX_DIR)
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/debug.c
--- a/tools/debugger/pdb/linux-2.6-module/debug.c Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/linux-2.6-module/debug.c Fri Jul 29 20:25:03 2005
@@ -3,49 +3,54 @@
* pdb debug functionality for processes.
*/
-
#include <linux/module.h>
+#include <linux/mm.h>
#include <linux/sched.h>
+#include <asm-i386/kdebug.h>
+#include <asm-xen/asm-i386/processor.h>
#include <asm-xen/asm-i386/ptrace.h>
#include <asm-xen/xen-public/xen.h>
-
#include "pdb_module.h"
-
-EXPORT_SYMBOL(pdb_attach);
-EXPORT_SYMBOL(pdb_detach);
-
-int
-pdb_attach (int pid)
-{
- struct task_struct *target;
+#include "pdb_debug.h"
+
+#define BWC_DEBUG 1
+#define BWC_INT3 3
+typedef struct bwcpoint /* break/watch/catch point */
+{
+ struct list_head list;
+ memory_t address;
+ u32 domain;
+ u32 process;
+ u8 old_value; /* old value for software bkpt */
+ u8 type; /* BWC_??? */
+} bwcpoint_t, *bwcpoint_p;
+
+static bwcpoint_t bwcpoint_list;
+
+void
+pdb_initialize_bwcpoint (void)
+{
+ memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t));
+ INIT_LIST_HEAD(&bwcpoint_list.list);
+
+ return;
+}
+
+
+int
+pdb_suspend (struct task_struct *target)
+{
u32 rc = 0;
- printk ("pdb attach: 0x%x\n", pid);
-
- read_lock(&tasklist_lock);
- target = find_task_by_pid(pid);
- if (target)
- get_task_struct(target);
- read_unlock(&tasklist_lock);
-
force_sig(SIGSTOP, target); /* force_sig_specific ??? */
return rc;
}
int
-pdb_detach (int pid)
-{
- int rc = 0;
- struct task_struct *target;
-
- printk ("pdb detach: 0x%x\n", pid);
-
- read_lock(&tasklist_lock);
- target = find_task_by_pid(pid);
- if (target)
- get_task_struct(target);
- read_unlock(&tasklist_lock);
+pdb_resume (struct task_struct *target)
+{
+ int rc = 0;
wake_up_process(target);
@@ -55,106 +60,350 @@
/*
* from linux-2.6.11/arch/i386/kernel/ptrace.c::getreg()
*/
-int
-pdb_read_register (int pid, pdb_op_rd_reg_p op, unsigned long *dest)
-{
- int rc = 0;
- struct task_struct *target;
+static unsigned long
+_pdb_get_register (struct task_struct *target, int reg)
+{
+ unsigned long result = ~0UL;
unsigned long offset;
unsigned char *stack = 0L;
- *dest = ~0UL;
-
- read_lock(&tasklist_lock);
- target = find_task_by_pid(pid);
- if (target)
- get_task_struct(target);
- read_unlock(&tasklist_lock);
-
- switch (op->reg)
- {
- case FS:
- *dest = target->thread.fs;
- break;
- case GS:
- *dest = target->thread.gs;
- break;
- case DS:
- case ES:
- case SS:
- case CS:
- *dest = 0xffff;
+ switch (reg)
+ {
+ case LINUX_FS:
+ result = target->thread.fs;
+ break;
+ case LINUX_GS:
+ result = target->thread.gs;
+ break;
+ case LINUX_DS:
+ case LINUX_ES:
+ case LINUX_SS:
+ case LINUX_CS:
+ result = 0xffff;
/* fall through */
default:
- if (op->reg > GS)
- op->reg -= 2;
-
- offset = op->reg * sizeof(long);
+ if (reg > LINUX_GS)
+ reg -= 2;
+
+ offset = reg * sizeof(long);
offset -= sizeof(struct pt_regs);
stack = (unsigned char *)target->thread.esp0;
stack += offset;
- *dest &= *((int *)stack);
- }
-
- /*
- printk ("pdb read register: 0x%x %2d 0x%p 0x%lx\n",
- pid, op->reg, stack, *dest);
- */
-
- return rc;
+ result &= *((int *)stack);
+ }
+
+ return result;
}
/*
* from linux-2.6.11/arch/i386/kernel/ptrace.c::putreg()
*/
-int
-pdb_write_register (int pid, pdb_op_wr_reg_p op)
-{
- int rc = 0;
- struct task_struct *target;
+static void
+_pdb_set_register (struct task_struct *target, int reg, unsigned long val)
+{
unsigned long offset;
unsigned char *stack;
- unsigned long value = op->value;
-
- /*
- printk ("pdb write register: 0x%x %2d 0x%lx\n", pid, op->reg, value);
- */
-
- read_lock(&tasklist_lock);
- target = find_task_by_pid(pid);
- if (target)
- get_task_struct(target);
- read_unlock(&tasklist_lock);
-
- switch (op->reg)
- {
- case FS:
+ unsigned long value = val;
+
+ switch (reg)
+ {
+ case LINUX_FS:
target->thread.fs = value;
- return rc;
- case GS:
+ return;
+ case LINUX_GS:
target->thread.gs = value;
- return rc;
- case DS:
- case ES:
+ return;
+ case LINUX_DS:
+ case LINUX_ES:
value &= 0xffff;
break;
- case SS:
- case CS:
+ case LINUX_SS:
+ case LINUX_CS:
value &= 0xffff;
break;
- case EFL:
- break;
- }
-
- if (op->reg > GS)
- op->reg -= 2;
- offset = op->reg * sizeof(long);
+ case LINUX_EFL:
+ break;
+ }
+
+ if (reg > LINUX_GS)
+ reg -= 2;
+ offset = reg * sizeof(long);
offset -= sizeof(struct pt_regs);
stack = (unsigned char *)target->thread.esp0;
stack += offset;
- *(unsigned long *) stack = op->value;
-
- return rc;
+ *(unsigned long *) stack = value;
+
+ return;
+}
+
+int
+pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op)
+{
+ int rc = 0;
+
+ op->reg[ 0] = _pdb_get_register(target, LINUX_EAX);
+ op->reg[ 1] = _pdb_get_register(target, LINUX_ECX);
+ op->reg[ 2] = _pdb_get_register(target, LINUX_EDX);
+ op->reg[ 3] = _pdb_get_register(target, LINUX_EBX);
+ op->reg[ 4] = _pdb_get_register(target, LINUX_ESP);
+ op->reg[ 5] = _pdb_get_register(target, LINUX_EBP);
+ op->reg[ 6] = _pdb_get_register(target, LINUX_ESI);
+ op->reg[ 7] = _pdb_get_register(target, LINUX_EDI);
+ op->reg[ 8] = _pdb_get_register(target, LINUX_EIP);
+ op->reg[ 9] = _pdb_get_register(target, LINUX_EFL);
+
+ op->reg[10] = _pdb_get_register(target, LINUX_CS);
+ op->reg[11] = _pdb_get_register(target, LINUX_SS);
+ op->reg[12] = _pdb_get_register(target, LINUX_DS);
+ op->reg[13] = _pdb_get_register(target, LINUX_ES);
+ op->reg[14] = _pdb_get_register(target, LINUX_FS);
+ op->reg[15] = _pdb_get_register(target, LINUX_GS);
+
+ return rc;
+}
+
+int
+pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op)
+{
+ int rc = 0;
+
+ _pdb_set_register(target, op->reg, op->value);
+
+ return rc;
+}
+
+int
+pdb_access_memory (struct task_struct *target, unsigned long address,
+ void *buffer, int length, int write)
+{
+ int rc = 0;
+
+ access_process_vm(target, address, buffer, length, write);
+
+ return rc;
+}
+
+int
+pdb_continue (struct task_struct *target)
+{
+ int rc = 0;
+ unsigned long eflags;
+
+ eflags = _pdb_get_register(target, LINUX_EFL);
+ eflags &= ~X86_EFLAGS_TF;
+ _pdb_set_register(target, LINUX_EFL, eflags);
+
+ wake_up_process(target);
+
+ return rc;
+}
+
+int
+pdb_step (struct task_struct *target)
+{
+ int rc = 0;
+ unsigned long eflags;
+ bwcpoint_p bkpt;
+
+ eflags = _pdb_get_register(target, LINUX_EFL);
+ eflags |= X86_EFLAGS_TF;
+ _pdb_set_register(target, LINUX_EFL, eflags);
+
+ bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
+ if ( bkpt == NULL )
+ {
+ printk("error: unable to allocation memory\n");
+ return -1;
+ }
+
+ bkpt->process = target->pid;
+ bkpt->address = 0;
+ bkpt->type = BWC_DEBUG;
+
+ list_add(&bkpt->list, &bwcpoint_list.list);
+
+ wake_up_process(target);
+
+ return rc;
+}
+
+int
+pdb_insert_memory_breakpoint (struct task_struct *target,
+ memory_t address, u32 length)
+{
+ int rc = 0;
+ bwcpoint_p bkpt;
+ u8 breakpoint_opcode = 0xcc;
+
+ printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length);
+
+ bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
+ if ( bkpt == NULL )
+ {
+ printk("error: unable to allocation memory\n");
+ return -1;
+ }
+
+ if ( length != 1 )
+ {
+ printk("error: breakpoint length should be 1\n");
+ kfree(bkpt);
+ return -1;
+ }
+
+ bkpt->process = target->pid;
+ bkpt->address = address;
+ bkpt->type = BWC_INT3;
+
+ pdb_access_memory(target, address, &bkpt->old_value, 1, 0);
+ pdb_access_memory(target, address, &breakpoint_opcode, 1, 1);
+
+ list_add(&bkpt->list, &bwcpoint_list.list);
+
+ printk("breakpoint_set %d:%lx OLD: 0x%x\n",
+ target->pid, address, bkpt->old_value);
+
+ return rc;
+}
+
+int
+pdb_remove_memory_breakpoint (struct task_struct *target,
+ memory_t address, u32 length)
+{
+ int rc = 0;
+ bwcpoint_p bkpt = NULL;
+
+ printk ("remove breakpoint %d:%lx\n", target->pid, address);
+
+ struct list_head *entry;
+ list_for_each(entry, &bwcpoint_list.list)
+ {
+ bkpt = list_entry(entry, bwcpoint_t, list);
+ if ( target->pid == bkpt->process &&
+ address == bkpt->address &&
+ bkpt->type == BWC_INT3 )
+ break;
+ }
+
+ if (bkpt == &bwcpoint_list || bkpt == NULL)
+ {
+ printk ("error: no breakpoint found\n");
+ return -1;
+ }
+
+ list_del(&bkpt->list);
+
+ pdb_access_memory(target, address, &bkpt->old_value, 1, 1);
+
+ kfree(bkpt);
+
+ return rc;
+}
+
+
+/***************************************************************/
+
+int
+pdb_exceptions_notify (struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+
+ switch (val)
+ {
+ case DIE_DEBUG:
+ if (pdb_debug_fn(args->regs, args->trapnr, args->err))
+ return NOTIFY_STOP;
+ break;
+ case DIE_TRAP:
+ if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err))
+ return NOTIFY_STOP;
+ break;
+ case DIE_INT3: /* without kprobes, we should never see
DIE_INT3 */
+ case DIE_GPF:
+ case DIE_PAGE_FAULT:
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+
+int
+pdb_debug_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition)
+{
+ pdb_response_t resp;
+ bwcpoint_p bkpt = NULL;
+
+ struct list_head *entry;
+ list_for_each(entry, &bwcpoint_list.list)
+ {
+ bkpt = list_entry(entry, bwcpoint_t, list);
+ if ( current->pid == bkpt->process &&
+ bkpt->type == BWC_DEBUG )
+ break;
+ }
+
+ if (bkpt == &bwcpoint_list || bkpt == NULL)
+ {
+ printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip);
+ return 0;
+ }
+
+ list_del(&bkpt->list);
+
+ pdb_suspend(current);
+
+ printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+
+ regs->eflags &= ~X86_EFLAGS_TF;
+ set_tsk_thread_flag(current, TIF_SINGLESTEP);
+
+ resp.operation = PDB_OPCODE_STEP;
+ resp.process = current->pid;
+ resp.status = PDB_RESPONSE_OKAY;
+
+ pdb_send_response(&resp);
+
+ return 1;
+}
+
+
+int
+pdb_int3_fn (struct pt_regs *regs, long error_code)
+{
+ pdb_response_t resp;
+ bwcpoint_p bkpt = NULL;
+
+ struct list_head *entry;
+ list_for_each(entry, &bwcpoint_list.list)
+ {
+ bkpt = list_entry(entry, bwcpoint_t, list);
+ if ( current->pid == bkpt->process &&
+ regs->eip == bkpt->address &&
+ bkpt->type == BWC_INT3 )
+ break;
+ }
+
+ if (bkpt == &bwcpoint_list || bkpt == NULL)
+ {
+ printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip);
+ return 0;
+ }
+
+ printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+
+ pdb_suspend(current);
+
+ resp.operation = PDB_OPCODE_CONTINUE;
+ resp.process = current->pid;
+ resp.status = PDB_RESPONSE_OKAY;
+
+ pdb_send_response(&resp);
+
+ return 1;
}
/*
diff -r a4196568095c -r b53a65034532
tools/debugger/pdb/linux-2.6-module/module.c
--- a/tools/debugger/pdb/linux-2.6-module/module.c Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/linux-2.6-module/module.c Fri Jul 29 20:25:03 2005
@@ -11,6 +11,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <asm-i386/kdebug.h>
+
#include <asm-xen/evtchn.h>
#include <asm-xen/ctrl_if.h>
#include <asm-xen/hypervisor.h>
@@ -20,17 +22,23 @@
#include <asm-xen/xen-public/io/ring.h>
#include "pdb_module.h"
+#include "pdb_debug.h"
#define PDB_RING_SIZE __RING_SIZE((pdb_sring_t *)0, PAGE_SIZE)
static pdb_back_ring_t pdb_ring;
static unsigned int pdb_evtchn;
static unsigned int pdb_irq;
+static unsigned int pdb_domain;
+
+/* work queue */
+static void pdb_work_handler(void *unused);
+static DECLARE_WORK(pdb_deferred_work, pdb_work_handler, NULL);
/*
* send response to a pdb request
*/
-static void
+void
pdb_send_response (pdb_response_t *response)
{
pdb_response_t *resp;
@@ -38,6 +46,7 @@
resp = RING_GET_RESPONSE(&pdb_ring, pdb_ring.rsp_prod_pvt);
memcpy(resp, response, sizeof(pdb_response_t));
+ resp->domain = pdb_domain;
wmb(); /* Ensure other side can see the response fields. */
pdb_ring.rsp_prod_pvt++;
@@ -53,42 +62,98 @@
pdb_process_request (pdb_request_t *request)
{
pdb_response_t resp;
+ struct task_struct *target;
+
+ read_lock(&tasklist_lock);
+ target = find_task_by_pid(request->process);
+ if (target)
+ get_task_struct(target);
+ read_unlock(&tasklist_lock);
+
+ resp.operation = request->operation;
+ resp.process = request->process;
+
+ if (!target)
+ {
+ printk ("(linux) target not found 0x%x\n", request->process);
+ resp.status = PDB_RESPONSE_ERROR;
+ goto response;
+ }
switch (request->operation)
{
+ case PDB_OPCODE_PAUSE :
+ pdb_suspend(target);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
case PDB_OPCODE_ATTACH :
- pdb_attach(request->process);
+ pdb_suspend(target);
+ pdb_domain = request->u.attach.domain;
+ printk("(linux) attach dom:0x%x pid:0x%x\n",
+ pdb_domain, request->process);
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_DETACH :
- pdb_detach(request->process);
- resp.status = PDB_RESPONSE_OKAY;
- break;
- case PDB_OPCODE_RD_REG :
- pdb_read_register(request->process, &request->u.rd_reg,
- (unsigned long *)&resp.value);
+ pdb_resume(target);
+ printk("(linux) detach 0x%x\n", request->process);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_RD_REGS :
+ pdb_read_registers(target, &resp.u.rd_regs);
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_WR_REG :
- pdb_write_register(request->process, &request->u.wr_reg);
+ pdb_write_register(target, &request->u.wr_reg);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_RD_MEM :
+ pdb_access_memory(target, request->u.rd_mem.address,
+ &resp.u.rd_mem.data, request->u.rd_mem.length, 0);
+ resp.u.rd_mem.address = request->u.rd_mem.address;
+ resp.u.rd_mem.length = request->u.rd_mem.length;
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_WR_MEM :
+ pdb_access_memory(target, request->u.wr_mem.address,
+ &request->u.wr_mem.data, request->u.wr_mem.length, 1);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_CONTINUE :
+ pdb_continue(target);
+ goto no_response;
+ break;
+ case PDB_OPCODE_STEP :
+ pdb_step(target);
+ resp.status = PDB_RESPONSE_OKAY;
+ goto no_response;
+ break;
+ case PDB_OPCODE_SET_BKPT :
+ pdb_insert_memory_breakpoint(target, request->u.bkpt.address,
+ request->u.bkpt.length);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_CLR_BKPT :
+ pdb_remove_memory_breakpoint(target, request->u.bkpt.address,
+ request->u.bkpt.length);
resp.status = PDB_RESPONSE_OKAY;
break;
default:
printk("(pdb) unknown request operation %d\n", request->operation);
resp.status = PDB_RESPONSE_ERROR;
}
-
- resp.operation = request->operation;
-
+
+ response:
pdb_send_response (&resp);
+
+ no_response:
return;
}
/*
- * receive a pdb request
- */
-static irqreturn_t
-pdb_interrupt (int irq, void *dev_id, struct pt_regs *ptregs)
+ * work queue
+ */
+static void
+pdb_work_handler (void *unused)
{
pdb_request_t *req;
RING_IDX i, rp;
@@ -105,10 +170,18 @@
}
pdb_ring.req_cons = i;
+}
+
+/*
+ * receive a pdb request
+ */
+static irqreturn_t
+pdb_interrupt (int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ schedule_work(&pdb_deferred_work);
return IRQ_HANDLED;
}
-
static void
pdb_send_connection_status(int status, memory_t ring)
@@ -135,8 +208,6 @@
static void
pdb_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
-printk ("pdb ctrlif rx\n");
-
switch (msg->subtype)
{
case CMSG_DEBUG_CONNECTION_STATUS:
@@ -160,17 +231,34 @@
return;
}
+
+/********************************************************************/
+
+static struct notifier_block pdb_exceptions_nb =
+{
+ .notifier_call = pdb_exceptions_notify,
+ .priority = 0x1 /* low priority */
+};
+
+
static int __init
-pdb_initialize(void)
-{
+pdb_initialize (void)
+{
+ int err;
pdb_sring_t *sring;
printk("----\npdb initialize %s %s\n", __DATE__, __TIME__);
+
+ pdb_initialize_bwcpoint();
/*
if ( xen_start_info.flags & SIF_INITDOMAIN )
return 1;
*/
+
+ pdb_evtchn = 0;
+ pdb_irq = 0;
+ pdb_domain = 0;
(void)ctrl_if_register_receiver(CMSG_DEBUG, pdb_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
@@ -184,12 +272,21 @@
pdb_send_connection_status(PDB_CONNECTION_STATUS_UP,
virt_to_machine(pdb_ring.sring) >> PAGE_SHIFT);
- return 0;
-}
+ /* handler for int1 & int3 */
+ err = register_die_notifier(&pdb_exceptions_nb);
+
+ return err;
+}
+
+extern struct notifier_block *i386die_chain;
+extern spinlock_t die_notifier_lock;
static void __exit
pdb_terminate(void)
{
+ int err = 0;
+ unsigned long flags;
+
printk("pdb cleanup\n");
(void)ctrl_if_unregister_receiver(CMSG_DEBUG, pdb_ctrlif_rx);
@@ -207,6 +304,12 @@
}
pdb_send_connection_status(PDB_CONNECTION_STATUS_DOWN, 0);
+
+ spin_lock_irqsave(&die_notifier_lock, flags);
+ err = notifier_chain_unregister(&i386die_chain, &pdb_exceptions_nb);
+ spin_unlock_irqrestore(&die_notifier_lock, flags);
+
+ return;
}
diff -r a4196568095c -r b53a65034532
tools/debugger/pdb/linux-2.6-module/pdb_module.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Fri Jul 29 20:25:03 2005
@@ -1,34 +1,80 @@
-#ifndef __XEN_PDB_H_
-#define __XEN_PDB_H_
+#ifndef __PDB_MODULE_H_
+#define __PDB_MODULE_H_
-#define PDB_OPCODE_ATTACH 1
-#define PDB_OPCODE_DETACH 2
+#include "../pdb_caml_xen.h"
-#define PDB_OPCODE_RD_REG 3
-typedef struct pdb_op_rd_reg
+#define PDB_OPCODE_PAUSE 1
+
+#define PDB_OPCODE_ATTACH 2
+typedef struct pdb_op_attach
{
- u32 reg;
-} pdb_op_rd_reg_t, *pdb_op_rd_reg_p;
+ u32 domain;
+} pdb_op_attach_t, *pdb_op_attach_p;
-#define PDB_OPCODE_WR_REG 4
+#define PDB_OPCODE_DETACH 3
+
+#define PDB_OPCODE_RD_REGS 4
+typedef struct pdb_op_rd_regs
+{
+ u32 reg[GDB_REGISTER_FRAME_SIZE];
+} pdb_op_rd_regs_t, *pdb_op_rd_regs_p;
+
+#define PDB_OPCODE_WR_REG 5
typedef struct pdb_op_wr_reg
{
u32 reg;
u32 value;
} pdb_op_wr_reg_t, *pdb_op_wr_reg_p;
+#define PDB_OPCODE_RD_MEM 6
+typedef struct pdb_op_rd_mem_req
+{
+ u32 address;
+ u32 length;
+} pdb_op_rd_mem_req_t, *pdb_op_rd_mem_req_p;
+
+typedef struct pdb_op_rd_mem_resp
+{
+ u32 address;
+ u32 length;
+ u8 data[1024];
+} pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p;
+
+#define PDB_OPCODE_WR_MEM 7
+typedef struct pdb_op_wr_mem
+{
+ u32 address;
+ u32 length;
+ u8 data[1024]; /* arbitrary */
+} pdb_op_wr_mem_t, *pdb_op_wr_mem_p;
+
+#define PDB_OPCODE_CONTINUE 8
+#define PDB_OPCODE_STEP 9
+
+#define PDB_OPCODE_SET_BKPT 10
+#define PDB_OPCODE_CLR_BKPT 11
+typedef struct pdb_op_bkpt
+{
+ u32 address;
+ u32 length;
+} pdb_op_bkpt_t, *pdb_op_bkpt_p;
+
+
typedef struct
{
u8 operation; /* PDB_OPCODE_??? */
- u32 domain;
u32 process;
union
{
- pdb_op_rd_reg_t rd_reg;
- pdb_op_wr_reg_t wr_reg;
+ pdb_op_attach_t attach;
+ pdb_op_wr_reg_t wr_reg;
+ pdb_op_rd_mem_req_t rd_mem;
+ pdb_op_wr_mem_t wr_mem;
+ pdb_op_bkpt_t bkpt;
} u;
} pdb_request_t, *pdb_request_p;
+
#define PDB_RESPONSE_OKAY 0
@@ -36,19 +82,18 @@
typedef struct {
u8 operation; /* copied from request */
+ u32 domain;
+ u32 process;
s16 status; /* PDB_RESPONSE_??? */
- u32 value;
+ union
+ {
+ pdb_op_rd_regs_t rd_regs;
+ pdb_op_rd_mem_resp_t rd_mem;
+ } u;
} pdb_response_t, *pdb_response_p;
DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t);
-
-
-int pdb_attach (int pid);
-int pdb_detach (int pid);
-int pdb_read_register (int pid, pdb_op_rd_reg_p op, unsigned long *dest);
-int pdb_write_register (int pid, pdb_op_wr_reg_p op);
-
#endif
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_process.c
--- a/tools/debugger/pdb/pdb_caml_process.c Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/pdb_caml_process.c Fri Jul 29 20:25:03 2005
@@ -66,49 +66,127 @@
}
/*
- * read a response from a pdb domain backend.
+ * process_handle_response : int32 -> int * int * string
*
- * grabs the response off a ring.
- */
-static void
-read_response (pdb_front_ring_t *pdb_ring, pdb_response_p response)
-{
- RING_IDX loop, rp;
-
- rp = pdb_ring->sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for ( loop = pdb_ring->rsp_cons; loop != rp; loop++ )
+ * A backend domain has notified pdb (via an event channel)
+ * that a command has finished.
+ * We read the result from the channel and formulate a response
+ * as a single string. Also return the domain and process.
+ */
+
+static inline unsigned int
+_flip (unsigned int orig)
+{
+ return (((orig << 24) & 0xff000000) | ((orig << 8) & 0x00ff0000) |
+ ((orig >> 8) & 0x0000ff00) | ((orig >> 24) & 0x000000ff));
+}
+
+value
+process_handle_response (value ring)
+{
+ CAMLparam1(ring);
+ CAMLlocal2(result, str);
+
+ RING_IDX rp;
+ pdb_response_p resp;
+ pdb_front_ring_t *my_ring = (pdb_front_ring_t *)Int32_val(ring);
+ char msg[2048];
+ int msglen;
+
+ memset(msg, 0, sizeof(msg));
+
+ rp = my_ring->sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ /* default response is OK unless the command has something
+ more interesting to say */
+ sprintf(msg, "OK");
+
+ if (my_ring->rsp_cons != rp)
{
- pdb_response_p resp;
-
- resp = RING_GET_RESPONSE(pdb_ring, loop);
- memcpy(response, resp, sizeof(pdb_response_t));
-
- /*
- printf ("got response %x %x %x\n", response->operation,
- response->status, response->value);
- */
+ resp = RING_GET_RESPONSE(my_ring, my_ring->rsp_cons);
+
+ switch (resp->operation)
+ {
+ case PDB_OPCODE_PAUSE :
+ case PDB_OPCODE_ATTACH :
+ case PDB_OPCODE_DETACH :
+ break;
+
+ case PDB_OPCODE_RD_REGS :
+ {
+ int loop;
+ pdb_op_rd_regs_p regs = &resp->u.rd_regs;
+
+ for (loop = 0; loop < GDB_REGISTER_FRAME_SIZE * 8; loop += 8)
+ {
+ sprintf(&msg[loop], "%08x", _flip(regs->reg[loop >> 3]));
+ }
+
+ break;
+ }
+ case PDB_OPCODE_WR_REG :
+ {
+ /* should check the return status */
+ break;
+ }
+
+ case PDB_OPCODE_RD_MEM :
+ {
+ int loop;
+ pdb_op_rd_mem_resp_p mem = &resp->u.rd_mem;
+
+ for (loop = 0; loop < mem->length; loop ++)
+ {
+ sprintf(&msg[loop * 2], "%02x", mem->data[loop]);
+ }
+ break;
+ }
+ case PDB_OPCODE_WR_MEM :
+ {
+ /* should check the return status */
+ break;
+ }
+
+ /* this is equivalent to process_xen_virq */
+ case PDB_OPCODE_CONTINUE :
+ {
+ sprintf(msg, "S05");
+ break;
+ }
+ case PDB_OPCODE_STEP :
+ {
+ sprintf(msg, "S05");
+ break;
+ }
+
+ case PDB_OPCODE_SET_BKPT :
+ {
+ break;
+ }
+ case PDB_OPCODE_CLR_BKPT :
+ {
+ break;
+ }
+
+ default :
+ printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n");
+ break;
+ }
+
+ my_ring->rsp_cons++;
}
- pdb_ring->rsp_cons = loop;
-}
-
-/*
- * process_handle_response : int32 -> unit
- */
-
-value
-process_handle_response (value ring)
-{
- CAMLparam1(ring);
-
- pdb_front_ring_t *my_ring = (pdb_front_ring_t *)Int32_val(ring);
- pdb_response_t resp;
-
- if ( my_ring )
- read_response(my_ring, &resp);
-
- CAMLreturn(Val_unit);
+
+ msglen = strlen(msg);
+ result = caml_alloc(3,0);
+ str = alloc_string(msglen);
+ memmove(&Byte(str,0), msg, msglen);
+
+ Store_field(result, 0, Val_int(resp->domain));
+ Store_field(result, 1, Val_int(resp->process));
+ Store_field(result, 2, str);
+
+ CAMLreturn(result);
}
/*
@@ -120,27 +198,14 @@
CAMLparam1(context);
context_t ctx;
pdb_request_t req;
- pdb_response_t resp;
-
- decode_context(&ctx, context);
-
- printf("(pdb) attach process [%d.%d] %d %p\n", ctx.domain, ctx.process,
- ctx.evtchn, ctx.ring);
- fflush(stdout);
+
+ decode_context(&ctx, context);
req.operation = PDB_OPCODE_ATTACH;
- req.domain = ctx.domain;
+ req.u.attach.domain = ctx.domain;
req.process = ctx.process;
send_request (ctx.ring, ctx.evtchn, &req);
-
- printf("awaiting response\n");
- fflush(stdout);
-
- read_response (ctx.ring, &resp);
-
- printf("response %d %d\n", resp.operation, resp.status);
- fflush(stdout);
CAMLreturn(Val_unit);
}
@@ -163,7 +228,6 @@
fflush(stdout);
req.operation = PDB_OPCODE_DETACH;
- req.domain = ctx.domain;
req.process = ctx.process;
send_request (ctx.ring, ctx.evtchn, &req);
@@ -180,67 +244,41 @@
{
CAMLparam1(context);
context_t ctx;
+ pdb_request_t req;
decode_context(&ctx, context);
printf("(pdb) pause target %d %d\n", ctx.domain, ctx.process);
fflush(stdout);
- CAMLreturn(Val_unit);
-}
-
-
-/*
- * proc_read_registers : context_t -> int32
+ req.operation = PDB_OPCODE_PAUSE;
+ req.process = ctx.process;
+
+ send_request (ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
+
+/*
+ * proc_read_registers : context_t -> unit
*/
value
proc_read_registers (value context)
{
CAMLparam1(context);
- CAMLlocal1(result);
-
- u32 regs[REGISTER_FRAME_SIZE];
-
- pdb_request_t req;
- context_t ctx;
- int loop;
-
- decode_context(&ctx, context);
-
- req.operation = PDB_OPCODE_RD_REG;
- req.domain = ctx.domain;
- req.process = ctx.process;
-
- for (loop = 0; loop < REGISTER_FRAME_SIZE; loop++)
- {
- pdb_response_t resp;
-
- req.u.rd_reg.reg = loop;
- send_request(ctx.ring, ctx.evtchn, &req);
- read_response(ctx.ring, &resp);
- regs[loop] = resp.value;
- }
-
- result = caml_alloc_tuple(16);
-
- Store_field(result, 0, caml_copy_int32(regs[LINUX_EAX]));
- Store_field(result, 1, caml_copy_int32(regs[LINUX_ECX]));
- Store_field(result, 2, caml_copy_int32(regs[LINUX_EDX]));
- Store_field(result, 3, caml_copy_int32(regs[LINUX_EBX]));
- Store_field(result, 4, caml_copy_int32(regs[LINUX_ESP]));
- Store_field(result, 5, caml_copy_int32(regs[LINUX_EBP]));
- Store_field(result, 6, caml_copy_int32(regs[LINUX_ESI]));
- Store_field(result, 7, caml_copy_int32(regs[LINUX_EDI]));
- Store_field(result, 8, caml_copy_int32(regs[LINUX_EIP]));
- Store_field(result, 9, caml_copy_int32(regs[LINUX_EFL]));
- Store_field(result, 10, caml_copy_int32(regs[LINUX_CS])); /* 16 */
- Store_field(result, 11, caml_copy_int32(regs[LINUX_SS])); /* 16 */
- Store_field(result, 12, caml_copy_int32(regs[LINUX_DS])); /* 16 */
- Store_field(result, 13, caml_copy_int32(regs[LINUX_ES])); /* 16 */
- Store_field(result, 14, caml_copy_int32(regs[LINUX_FS])); /* 16 */
- Store_field(result, 15, caml_copy_int32(regs[LINUX_GS])); /* 16 */
-
- CAMLreturn(result);
+
+ pdb_request_t req;
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_RD_REGS;
+ req.process = ctx.process;
+
+ send_request (ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
}
@@ -257,12 +295,10 @@
context_t ctx;
pdb_request_t req;
- pdb_response_t resp;
decode_context(&ctx, context);
req.operation = PDB_OPCODE_WR_REG;
- req.domain = ctx.domain;
req.process = ctx.process;
req.u.wr_reg.value = my_newval;
@@ -290,71 +326,34 @@
}
send_request(ctx.ring, ctx.evtchn, &req);
- read_response(ctx.ring, &resp);
-
- CAMLreturn(Val_unit);
-}
-
-
-/*
- * proc_read_memory : context_t -> int32 -> int -> int
+
+ CAMLreturn(Val_unit);
+}
+
+
+/*
+ * proc_read_memory : context_t -> int32 -> int -> unit
*/
value
proc_read_memory (value context, value address, value length)
{
CAMLparam3(context, address, length);
- CAMLlocal2(result, temp);
-
- context_t ctx;
- int loop;
- char *buffer;
- /* memory_t my_address = Int32_val(address); */
- u32 my_length = Int_val(length);
-
- printf ("(pdb) read memory\n");
-
- decode_context(&ctx, context);
-
- buffer = malloc(my_length);
- if ( buffer == NULL )
- {
- printf("(pdb) read memory: malloc failed.\n"); fflush(stdout);
- failwith("read memory error");
- }
-
- /*
- if ( xendebug_read_memory(xc_handle, ctx.domain, ctx.vcpu,
- my_address, my_length, buffer) )
- {
- printf("(pdb) read memory error!\n"); fflush(stdout);
- failwith("read memory error");
- }
- */
-
- memset(buffer, 0xff, my_length);
-
- result = caml_alloc(2,0);
- if ( my_length > 0 ) /* car */
- {
- Store_field(result, 0, Val_int(buffer[my_length - 1] & 0xff));
- }
- else
-
- {
- Store_field(result, 0, Val_int(0));
- }
- Store_field(result, 1, Val_int(0)); /* cdr */
-
- for (loop = 1; loop < my_length; loop++)
- {
- temp = result;
- result = caml_alloc(2,0);
- Store_field(result, 0, Val_int(buffer[my_length - loop - 1] & 0xff));
- Store_field(result, 1, temp);
- }
-
- CAMLreturn(result);
-}
+
+ context_t ctx;
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_RD_MEM;
+ req.process = ctx.process;
+ req.u.rd_mem.address = Int32_val(address);
+ req.u.rd_mem.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
/*
* proc_write_memory : context_t -> int32 -> int list -> unit
@@ -366,50 +365,37 @@
CAMLlocal1(node);
context_t ctx;
-
- char buffer[4096]; /* a big buffer */
- memory_t my_address;
+ pdb_request_t req;
u32 length = 0;
- printf ("(pdb) write memory\n");
-
- decode_context(&ctx, context);
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_WR_MEM;
+ req.process = ctx.process;
node = val_list;
if ( Int_val(node) == 0 ) /* gdb functionalty test uses empty list */
{
- CAMLreturn(Val_unit);
+ req.u.wr_mem.address = Int32_val(address);
+ req.u.wr_mem.length = 0;
}
-
- while ( Int_val(Field(node,1)) != 0 )
+ else
{
- buffer[length++] = Int_val(Field(node, 0));
- node = Field(node,1);
+ while ( Int_val(Field(node,1)) != 0 )
+ {
+ req.u.wr_mem.data[length++] = Int_val(Field(node, 0));
+ node = Field(node,1);
+ }
+ req.u.wr_mem.data[length++] = Int_val(Field(node, 0));
+
+ req.u.wr_mem.address = Int32_val(address);
+ req.u.wr_mem.length = length;
}
- buffer[length++] = Int_val(Field(node, 0));
-
- my_address = (memory_t) Int32_val(address);
-
- /*
- if ( xendebug_write_memory(xc_handle, ctx.domain, ctx.vcpu,
- my_address, length, buffer) )
- {
- printf("(pdb) write memory error!\n"); fflush(stdout);
- failwith("write memory error");
- }
- */
- {
- int loop;
- for (loop = 0; loop < length; loop++)
- {
- printf (" %02x", buffer[loop]);
- }
- printf ("\n");
- }
-
- CAMLreturn(Val_unit);
-}
-
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
/*
@@ -421,17 +407,14 @@
CAMLparam1(context);
context_t ctx;
-
- decode_context(&ctx, context);
-
- /*
- if ( xendebug_continue(xc_handle, ctx.domain, ctx.vcpu) )
- {
- printf("(pdb) continue\n"); fflush(stdout);
- failwith("continue");
- }
- */
- printf ("CONTINUE\n");
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_CONTINUE;
+ req.process = ctx.process;
+
+ send_request(ctx.ring, ctx.evtchn, &req);
CAMLreturn(Val_unit);
}
@@ -445,17 +428,14 @@
CAMLparam1(context);
context_t ctx;
-
- decode_context(&ctx, context);
-
- /*
- if ( xendebug_step(xc_handle, ctx.domain, ctx.vcpu) )
- {
- printf("(pdb) step\n"); fflush(stdout);
- failwith("step");
- }
- */
- printf ("STEP\n");
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_STEP;
+ req.process = ctx.process;
+
+ send_request(ctx.ring, ctx.evtchn, &req);
CAMLreturn(Val_unit);
}
@@ -471,22 +451,16 @@
CAMLparam3(context, address, length);
context_t ctx;
- memory_t my_address = (memory_t) Int32_val(address);
- int my_length = Int_val(length);
-
- decode_context(&ctx, context);
-
- printf ("(pdb) insert memory breakpoint 0x%lx %d\n",
- my_address, my_length);
-
- /*
- if ( xendebug_insert_memory_breakpoint(xc_handle, ctx.domain, ctx.vcpu,
- my_address, my_length) )
- {
- printf("(pdb) error: insert memory breakpoint\n"); fflush(stdout);
- failwith("insert memory breakpoint");
- }
- */
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_SET_BKPT;
+ req.process = ctx.process;
+ req.u.bkpt.address = (memory_t) Int32_val(address);
+ req.u.bkpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
CAMLreturn(Val_unit);
}
@@ -500,24 +474,16 @@
CAMLparam3(context, address, length);
context_t ctx;
-
- memory_t my_address = (memory_t) Int32_val(address);
- int my_length = Int_val(length);
-
- printf ("(pdb) remove memory breakpoint 0x%lx %d\n",
- my_address, my_length);
-
- decode_context(&ctx, context);
-
- /*
- if ( xendebug_remove_memory_breakpoint(xc_handle,
- ctx.domain, ctx.vcpu,
- my_address, my_length) )
- {
- printf("(pdb) error: remove memory breakpoint\n"); fflush(stdout);
- failwith("remove memory breakpoint");
- }
- */
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_CLR_BKPT;
+ req.process = ctx.process;
+ req.u.bkpt.address = (memory_t) Int32_val(address);
+ req.u.bkpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
CAMLreturn(Val_unit);
}
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_xcs.c
--- a/tools/debugger/pdb/pdb_caml_xcs.c Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/pdb_caml_xcs.c Fri Jul 29 20:25:03 2005
@@ -201,7 +201,7 @@
ret = connect(control_fd, (struct sockaddr *)&addr, len);
if (ret < 0)
{
- printf("error connecting to xcs(ctrl)! (%d)\n", errno);
+ printf("error connecting to xcs (ctrl)! (%d)\n", errno);
goto ctrl_fd_fail;
}
@@ -235,7 +235,7 @@
ret = connect(data_fd, (struct sockaddr *)&addr, len);
if (ret < 0)
{
- printf("error connecting to xcs(data)! (%d)\n", errno);
+ printf("error connecting to xcs (data)! (%d)\n", errno);
goto data_fd_fail;
}
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_xen.h
--- a/tools/debugger/pdb/pdb_caml_xen.h Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/pdb_caml_xen.h Fri Jul 29 20:25:03 2005
@@ -10,11 +10,12 @@
#ifndef _PDB_CAML_XEN_DEFINED_
#define _PDB_CAML_XEN_DEFINED_
-enum gdb_registers { GDB_EAX, GDB_ECX, GDB_EDX, GDB_EBX,
- GDB_ESP, GDB_EBP, GDB_ESI, GDB_EDI,
- GDB_EIP, GDB_EFL,
- GDB_CS, GDB_SS, GDB_DS, GDB_ES,
- GDB_FS, GDB_GS };
+enum gdb_registers { /* 32 */ GDB_EAX, GDB_ECX, GDB_EDX, GDB_EBX,
+ GDB_ESP, GDB_EBP, GDB_ESI, GDB_EDI,
+ GDB_EIP, GDB_EFL,
+ /* 16 */ GDB_CS, GDB_SS, GDB_DS, GDB_ES,
+ GDB_FS, GDB_GS };
+#define GDB_REGISTER_FRAME_SIZE 16
/* this order comes from linux-2.6.11/include/asm-i386/ptrace.h */
enum x86_registers { LINUX_EBX, LINUX_ECX, LINUX_EDX, LINUX_ESI, LINUX_EDI,
@@ -24,7 +25,11 @@
#define REGISTER_FRAME_SIZE 17
+/* hack: this is also included from the pdb linux module which
+ has PAGE_SIZE defined */
+#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
+#endif
extern int xc_handle;
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/readme
--- a/tools/debugger/pdb/readme Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/readme Fri Jul 29 20:25:03 2005
@@ -31,7 +31,7 @@
Build the target domains with debugging symbols.
make CONFIG_DEBUG_INFO=true CONFIG_FRAME_POINTER=false linux-2.6-xenU-build
- You can also change linux-2.6.11-xenU/Makefile
+ You can also change linux-2.6.12-xenU/Makefile
CONFIG_CC_OPTIMIZE_FOR_SIZE from -O2 to -O
- Build PDB
@@ -46,7 +46,7 @@
domain-0.xeno# ./pdb <port>
- Run GDB
- hostname% gdb <xeno.bk>/dist/install/boot/vmlinux-syms-2.6.11.11-xenU
+ hostname% gdb <xeno.bk>/dist/install/boot/vmlinux-syms-2.6.12-xenU
(gdb) target remote domain-0.xeno:<port>
@@ -76,9 +76,18 @@
continue
print
+Process
+
+ PDB can also debug a process running in a Linux 2.6 domain.
+ After running PDB in domain 0, insert the pdb module in dom u:
+
+ % insmod linux-2.6-module/pdb.ko
+
+ Load GDB with the appropriate symbols, and attach with
+
+ (gdb) maint packet x context = process <domid> <pid>
To Do
- watchpoints
- support for SMP
-- support for user applications
diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/server.ml
--- a/tools/debugger/pdb/server.ml Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/server.ml Fri Jul 29 20:25:03 2005
@@ -9,7 +9,7 @@
open Unix
open Buffer
-
+open Util
(**
* connection_t: The state for each connection.
@@ -98,7 +98,7 @@
(String.escaped reply));
Util.send_reply sock reply
with
- Debugger.No_reply ->
+ Util.No_reply ->
print_endline (Printf.sprintf "[%s] %s -> null"
(Util.get_connection_info sock)
(String.escaped command))
diff -r a4196568095c -r b53a65034532 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp Fri Jul 29 18:52:33 2005
+++ b/tools/examples/xend-config.sxp Fri Jul 29 20:25:03 2005
@@ -44,3 +44,11 @@
# Setup script for enbd-backed block devices
(block-enbd block-enbd)
+# Dom0 will balloon out when needed to free memory for domU.
+# dom0-min-mem is the lowest memory level (in MB) dom0 will get down to.
+# If dom0-min-mem=0, dom0 will never balloon out.
+(dom0-min-mem 0)
+
+# In SMP system, dom0 will use only CPUs in range [1,dom0-cpus]
+# If dom0-cpus = 0, dom0 will take all cpus available
+(dom0-cpus 0)
diff -r a4196568095c -r b53a65034532 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Fri Jul 29 18:52:33 2005
+++ b/tools/firmware/rombios/rombios.c Fri Jul 29 20:25:03 2005
@@ -28,6 +28,11 @@
#define VMXASSIST
#undef VMXTEST
+
+// Xen full virtualization does not handle unaligned IO with page crossing.
+// Disable 32-bit PIO as a workaround.
+#define NO_PIO32
+
// ROM BIOS compatability entry points:
// ===================================
@@ -2248,6 +2253,9 @@
Bit16u cylinders, heads, spt, blksize;
Bit8u translation, removable, mode;
+ // default mode to PIO16
+ mode = ATA_MODE_PIO16;
+
//Temporary values to do the transfer
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
@@ -2256,7 +2264,10 @@
BX_PANIC("ata-detect: Failed to detect ATA device\n");
removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
+#ifndef NO_PIO32
mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 :
ATA_MODE_PIO16;
+#endif
+
blksize = read_word(get_SS(),buffer+10);
cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
@@ -2346,6 +2357,9 @@
Bit8u type, removable, mode;
Bit16u blksize;
+ // default mode to PIO16
+ mode = ATA_MODE_PIO16;
+
//Temporary values to do the transfer
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
@@ -2355,7 +2369,9 @@
type = read_byte(get_SS(),buffer+1) & 0x1f;
removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
+#ifndef NO_PIO32
mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 :
ATA_MODE_PIO16;
+#endif
blksize = 2048;
write_byte(ebda_seg,&EbdaData->ata.devices[device].device, type);
diff -r a4196568095c -r b53a65034532 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c Fri Jul 29 18:52:33 2005
+++ b/tools/ioemu/monitor.c Fri Jul 29 20:25:03 2005
@@ -514,21 +514,45 @@
str_allocated[nb_args] = str;
add_str:
if (nb_args >= MAX_ARGS) {
-#if 0
error_args:
-#endif
term_printf("%s: too many arguments\n", cmdname);
goto fail;
}
args[nb_args++] = str;
}
break;
+ case '-':
+ {
+ int has_option;
+ /* option */
+
+ c = *typestr++;
+ if (c == '\0')
+ goto bad_type;
+ while (isspace(*p))
+ p++;
+ has_option = 0;
+ if (*p == '-') {
+ p++;
+ if (*p != c) {
+ term_printf("%s: unsupported option -%c\n",
+ cmdname, *p);
+ goto fail;
+ }
+ p++;
+ has_option = 1;
+ }
+ if (nb_args >= MAX_ARGS)
+ goto error_args;
+ args[nb_args++] = (void *)has_option;
+ }
+ break;
/* TODO: add more commands we need here to support vmx device model */
case '/':
case 'i':
- case '-':
default:
- term_printf("%s: unknown type '%c', we only support quit command
now.\n", cmdname, c);
+ bad_type:
+ term_printf("%s: unknown type '%c',not support now.\n", cmdname,
c);
goto fail;
}
}
diff -r a4196568095c -r b53a65034532 tools/ioemu/target-i386-dm/qemu-dm.debug
--- a/tools/ioemu/target-i386-dm/qemu-dm.debug Fri Jul 29 18:52:33 2005
+++ b/tools/ioemu/target-i386-dm/qemu-dm.debug Fri Jul 29 20:25:03 2005
@@ -2,4 +2,4 @@
echo $* > /tmp/args
echo $DISPLAY >> /tmp/args
-exec /usr/bin/qemu-dm $*
+exec /usr/lib/xen/bin/qemu-dm $*
diff -r a4196568095c -r b53a65034532 tools/ioemu/vnc.c
--- a/tools/ioemu/vnc.c Fri Jul 29 18:52:33 2005
+++ b/tools/ioemu/vnc.c Fri Jul 29 20:25:03 2005
@@ -393,7 +393,28 @@
keycode>>=8;
}
} else if(down) {
- kbd_put_keysym(keySym);
+ int qemu_keysym = 0;
+ if (keySym <= 128) { /* normal ascii */
+ qemu_keysym = keySym;
+ } else {
+ switch(keySym) {
+ case XK_Up: qemu_keysym = QEMU_KEY_UP; break;
+ case XK_Down: qemu_keysym = QEMU_KEY_DOWN; break;
+ case XK_Left: qemu_keysym = QEMU_KEY_LEFT; break;
+ case XK_Right: qemu_keysym = QEMU_KEY_RIGHT; break;
+ case XK_Home: qemu_keysym = QEMU_KEY_HOME; break;
+ case XK_End: qemu_keysym = QEMU_KEY_END; break;
+ case XK_Page_Up: qemu_keysym = QEMU_KEY_PAGEUP; break;
+ case XK_Page_Down: qemu_keysym = QEMU_KEY_PAGEDOWN; break;
+ case XK_BackSpace: qemu_keysym = QEMU_KEY_BACKSPACE; break;
+ case XK_Delete: qemu_keysym = QEMU_KEY_DELETE; break;
+ case XK_Return:
+ case XK_Linefeed: qemu_keysym = keySym; break;
+ default: break;
+ }
+ }
+ if (qemu_keysym != 0)
+ kbd_put_keysym(qemu_keysym);
}
if(down) {
if(keySym==XK_Control_L)
diff -r a4196568095c -r b53a65034532 tools/libxc/Makefile
--- a/tools/libxc/Makefile Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/Makefile Fri Jul 29 20:25:03 2005
@@ -19,18 +19,22 @@
SRCS += xc_domain.c
SRCS += xc_evtchn.c
SRCS += xc_gnttab.c
-SRCS += xc_load_aout9.c
SRCS += xc_load_bin.c
SRCS += xc_load_elf.c
SRCS += xc_linux_build.c
-SRCS += xc_linux_restore.c
-SRCS += xc_linux_save.c
SRCS += xc_misc.c
SRCS += xc_physdev.c
SRCS += xc_private.c
+ifeq ($(XEN_TARGET_ARCH),ia64)
+SRCS += xc_ia64_stubs.c
+else
+SRCS += xc_load_aout9.c
+SRCS += xc_linux_restore.c
+SRCS += xc_linux_save.c
+SRCS += xc_vmx_build.c
SRCS += xc_ptrace.c
SRCS += xc_ptrace_core.c
-SRCS += xc_vmx_build.c
+endif
CFLAGS += -Wall
CFLAGS += -Werror
diff -r a4196568095c -r b53a65034532 tools/libxc/xc.h
--- a/tools/libxc/xc.h Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc.h Fri Jul 29 20:25:03 2005
@@ -27,6 +27,14 @@
#include <xen/sched_ctl.h>
#include <xen/acm.h>
+#ifdef __ia64__
+#define XC_PAGE_SHIFT 14
+#else
+#define XC_PAGE_SHIFT 12
+#endif
+#define XC_PAGE_SIZE (1UL << XC_PAGE_SHIFT)
+#define XC_PAGE_MASK (~(XC_PAGE_SIZE-1))
+
/*
* DEFINITIONS FOR CPU BARRIERS
*/
@@ -39,6 +47,11 @@
#define mb() __asm__ __volatile__ ( "mfence" : : : "memory")
#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory")
#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__ia64__)
+/* FIXME */
+#define mb()
+#define rmb()
+#define wmb()
#else
#error "Define barriers"
#endif
@@ -462,6 +475,9 @@
int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf,
unsigned long max_pfns);
+int xc_ia64_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf,
+ unsigned int start_page, unsigned int nr_pages);
+
/*\
* GRANT TABLE FUNCTIONS
\*/
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_domain.c Fri Jul 29 20:25:03 2005
@@ -264,10 +264,11 @@
unsigned int mem_kb)
{
int err;
+ unsigned int npages = mem_kb / (PAGE_SIZE/1024);
err = do_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
- mem_kb / 4, 0, domid);
- if (err == mem_kb / 4)
+ npages, 0, domid);
+ if (err == npages)
return 0;
if (err > 0) {
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_linux_build.c Fri Jul 29 20:25:03 2005
@@ -8,7 +8,7 @@
#define ELFSIZE 32
#endif
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__ia64__)
#define ELFSIZE 64
#endif
@@ -34,6 +34,10 @@
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+#ifdef __ia64__
+#define probe_aout9(image,image_size,load_funcs) 1
+#endif
static int probeimageformat(char *image,
unsigned long image_size,
@@ -258,6 +262,67 @@
}
#endif
+#ifdef __ia64__
+#include <asm/fpu.h> /* for FPSR_DEFAULT */
+static int setup_guest(int xc_handle,
+ u32 dom,
+ char *image, unsigned long image_size,
+ gzFile initrd_gfd, unsigned long initrd_len,
+ unsigned long nr_pages,
+ unsigned long *pvsi, unsigned long *pvke,
+ unsigned long *pvss, vcpu_guest_context_t *ctxt,
+ const char *cmdline,
+ unsigned long shared_info_frame,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn, unsigned long *store_mfn)
+{
+ unsigned long *page_array = NULL;
+ struct load_funcs load_funcs;
+ struct domain_setup_info dsi;
+ unsigned long start_page;
+ int rc;
+
+ rc = probeimageformat(image, image_size, &load_funcs);
+ if ( rc != 0 )
+ goto error_out;
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ rc = (load_funcs.parseimage)(image, image_size, &dsi);
+ if ( rc != 0 )
+ goto error_out;
+
+ dsi.v_start = round_pgdown(dsi.v_start);
+ dsi.v_end = round_pgup(dsi.v_end);
+
+ start_page = dsi.v_start >> PAGE_SHIFT;
+ nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
+ if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+ {
+ PERROR("Could not allocate memory");
+ goto error_out;
+ }
+
+ if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page,
nr_pages) != nr_pages )
+ {
+ PERROR("Could not get the page frame list");
+ goto error_out;
+ }
+
+ (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
+ &dsi);
+
+ *pvke = dsi.v_kernentry;
+ return 0;
+
+ error_out:
+ if ( page_array != NULL )
+ free(page_array);
+ return -1;
+}
+#else /* x86 */
static int setup_guest(int xc_handle,
u32 dom,
char *image, unsigned long image_size,
@@ -500,6 +565,8 @@
goto error_out;
#endif
+ *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
+
start_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
@@ -511,7 +578,7 @@
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
start_info->domain_controller_evtchn = control_evtchn;
- start_info->store_page = vstoreinfo_start;
+ start_info->store_mfn = *store_mfn;
start_info->store_evtchn = store_evtchn;
if ( initrd_len != 0 )
{
@@ -521,9 +588,6 @@
strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
munmap(start_info, PAGE_SIZE);
-
- /* Tell our caller where we told domain store page was. */
- *store_mfn = page_array[((vstoreinfo_start-dsi.v_start)>>PAGE_SHIFT)];
/* shared_info page starts its life empty. */
shared_info = xc_map_foreign_range(
@@ -558,6 +622,7 @@
free(page_array);
return -1;
}
+#endif
int xc_linux_build(int xc_handle,
u32 domid,
@@ -628,7 +693,11 @@
}
if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
+#ifdef __ia64__
+ 0 )
+#else
(ctxt->ctrlreg[3] != 0) )
+#endif
{
ERROR("Domain is already constructed");
goto error_out;
@@ -653,6 +722,18 @@
if ( image != NULL )
free(image);
+#ifdef __ia64__
+ /* based on new_thread in xen/arch/ia64/domain.c */
+ ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
+ ctxt->regs.cr_iip = vkern_entry;
+ ctxt->regs.cr_ifs = 1UL << 63;
+ ctxt->regs.ar_fpsr = FPSR_DEFAULT;
+ /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should
move here */
+ ctxt->vcpu.privregs = 0;
+ ctxt->shared.domain_controller_evtchn = control_evtchn;
+ ctxt->shared.flags = flags;
+ i = 0; /* silence unused variable warning */
+#else /* x86 */
/*
* Initial register values:
* DS,ES,FS,GS = FLAT_KERNEL_DS
@@ -707,6 +788,7 @@
ctxt->failsafe_callback_eip = 0;
ctxt->syscall_callback_eip = 0;
#endif
+#endif /* x86 */
memset( &launch_op, 0, sizeof(launch_op) );
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_load_elf.c Fri Jul 29 20:25:03 2005
@@ -7,7 +7,7 @@
#if defined(__i386__)
#define ELFSIZE 32
#endif
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__ia64__)
#define ELFSIZE 64
#endif
@@ -122,8 +122,12 @@
}
if ( guestinfo == NULL )
{
+#ifdef __ia64__
+ guestinfo = "";
+#else
ERROR("Not a Xen-ELF image: '__xen_guest' section not found.");
return -EINVAL;
+#endif
}
for ( h = 0; h < ehdr->e_phnum; h++ )
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_private.c Fri Jul 29 20:25:03 2005
@@ -256,6 +256,37 @@
return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
}
+#ifdef __ia64__
+int xc_ia64_get_pfn_list(int xc_handle,
+ u32 domid,
+ unsigned long *pfn_buf,
+ unsigned int start_page,
+ unsigned int nr_pages)
+{
+ dom0_op_t op;
+ int ret;
+
+ op.cmd = DOM0_GETMEMLIST;
+ op.u.getmemlist.domain = (domid_t)domid;
+ op.u.getmemlist.max_pfns = ((unsigned long)start_page << 32) | nr_pages;
+ op.u.getmemlist.buffer = pfn_buf;
+
+ if ( mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 )
+ {
+ PERROR("Could not lock pfn list buffer");
+ return -1;
+ }
+
+ /* XXX Hack to put pages in TLB, hypervisor should be able to handle this
*/
+ memset(pfn_buf, 0, nr_pages * sizeof(unsigned long));
+ ret = do_dom0_op(xc_handle, &op);
+
+ (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long));
+
+ return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
+}
+#endif
+
long xc_get_tot_pages(int xc_handle, u32 domid)
{
dom0_op_t op;
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_private.h Fri Jul 29 20:25:03 2005
@@ -55,7 +55,7 @@
#define L4_PAGETABLE_ENTRIES 512
#endif
-#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define PAGE_SHIFT XC_PAGE_SHIFT
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff -r a4196568095c -r b53a65034532 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Fri Jul 29 18:52:33 2005
+++ b/tools/libxc/xc_ptrace.c Fri Jul 29 20:25:03 2005
@@ -202,7 +202,7 @@
}
if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
goto error_out;
- if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ if (ctxt[cpu].flags & VGCF_VMX_GUEST && paging_enabled(&ctxt[cpu]))
page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
if (page != page_phys[cpu] || perm != prev_perm[cpu])
{
diff -r a4196568095c -r b53a65034532 tools/misc/Makefile
--- a/tools/misc/Makefile Fri Jul 29 18:52:33 2005
+++ b/tools/misc/Makefile Fri Jul 29 20:25:03 2005
@@ -22,7 +22,9 @@
build: $(TARGETS)
$(MAKE) -C miniterm
$(MAKE) -C cpuperf
+ifneq ($(XEN_TARGET_ARCH),ia64)
$(MAKE) -C mbootpack
+endif
$(MAKE) -C lomount
install: build
diff -r a4196568095c -r b53a65034532
tools/misc/policyprocessor/XmlToBinInterface.java
--- a/tools/misc/policyprocessor/XmlToBinInterface.java Fri Jul 29 18:52:33 2005
+++ b/tools/misc/policyprocessor/XmlToBinInterface.java Fri Jul 29 20:25:03 2005
@@ -123,7 +123,7 @@
final short binaryBufferHeaderSz = (3 * u32Size + 4* u16Size);
/* copied directlty from policy_ops.h */
- final int POLICY_INTERFACE_VERSION = 0xAAAA0002;
+ final int POLICY_INTERFACE_VERSION = 0xAAAA0003;
/* copied directly from acm.h */
final int ACM_MAGIC = 0x0001debc;
diff -r a4196568095c -r b53a65034532 tools/python/setup.py
--- a/tools/python/setup.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/setup.py Fri Jul 29 20:25:03 2005
@@ -51,6 +51,7 @@
'xen.xend.xenstore',
'xen.xm',
'xen.web',
+ 'xen.sv'
],
ext_package = "xen.lowlevel",
ext_modules = [ xc, xu, xs ]
diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jul 29 20:25:03 2005
@@ -242,7 +242,7 @@
"paused", info[i].paused,
"blocked", info[i].blocked,
"running", info[i].running,
- "mem_kb", info[i].nr_pages*4,
+ "mem_kb",
info[i].nr_pages*(XC_PAGE_SIZE/1024),
"cpu_time", info[i].cpu_time,
"maxmem_kb", info[i].max_memkb,
"ssidref", info[i].ssidref,
@@ -813,6 +813,22 @@
return zero;
}
+static PyObject *pyxc_init_store(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+
+ int remote_port;
+
+ static char *kwd_list[] = { "remote_port", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list,
+ &remote_port) )
+ return NULL;
+
+ return PyInt_FromLong(xc_init_store(xc->xc_handle, remote_port));
+}
+
static PyMethodDef pyxc_methods[] = {
{ "handle",
@@ -1088,6 +1104,13 @@
" mem_kb [long]: .\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+ { "init_store",
+ (PyCFunction)pyxc_init_store,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Initialize the store event channel and return the store page mfn.\n"
+ " remote_port [int]: store event channel port number.\n"
+ "Returns: [int] mfn on success; <0 on error.\n" },
+
{ NULL, NULL, 0, NULL }
};
diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Fri Jul 29 20:25:03 2005
@@ -367,7 +367,7 @@
if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
&path, &priority, &token))
goto exit;
- xsval = xs_watch(xh, path, token, priority);
+ xsval = xs_watch(xh, path, token);
val = pyvalue_int(xsval);
exit:
return val;
diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xu/xu.c
--- a/tools/python/xen/lowlevel/xu/xu.c Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/lowlevel/xu/xu.c Fri Jul 29 20:25:03 2005
@@ -45,9 +45,6 @@
#define EVTCHN_BIND _IO('E', 2)
/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
#define EVTCHN_UNBIND _IO('E', 3)
-
-/* Size of a machine page frame. */
-#define PAGE_SIZE 4096
/* Set the close-on-exec flag on a file descriptor. Doesn't currently bother
* to check for errors. */
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py Fri Jul 29 20:25:03 2005
@@ -17,7 +17,6 @@
from XendLogging import log
SIGNATURE = "LinuxGuestRecord"
-PAGE_SIZE = 4096
PATH_XC_SAVE = "/usr/libexec/xen/xc_save"
PATH_XC_RESTORE = "/usr/libexec/xen/xc_restore"
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/XendDomain.py Fri Jul 29 20:25:03 2005
@@ -112,6 +112,11 @@
else:
self._delete_domain(domid)
self.refresh(cleanup=True)
+
+ dom0 = self.domain_lookup(0)
+ if not dom0:
+ dom0 = self.domain_unknown(0)
+ dom0.dom0_init_store()
def close(self):
pass
@@ -218,10 +223,6 @@
if cleanup:
self.reap()
doms = self.xen_domains()
- # Add entries for any domains we don't know about.
- for id in doms.keys():
- if id not in self.domains:
- self.domain_lookup(id)
# Remove entries for domains that no longer exist.
# Update entries for existing domains.
do_domain_restarts = False
@@ -331,22 +332,25 @@
self.update_domain(id)
return self.domains.get(id)
+ def domain_unknown(self, id):
+ try:
+ info = self.xen_domain(id)
+ if info:
+ uuid = getUuid()
+ log.info(
+ "Creating entry for unknown domain: id=%d uuid=%s",
+ id, uuid)
+ db = self.dbmap.addChild(uuid)
+ dominfo = XendDomainInfo.recreate(db, info)
+ dominfo.setdom(id)
+ self._add_domain(dominfo)
+ return dominfo
+ except Exception, ex:
+ log.exception("Error creating domain info: id=%d", id)
+ return None
+
def domain_lookup(self, id):
- dominfo = self.domains.get(id)
- if not dominfo:
- try:
- info = self.xen_domain(id)
- if info:
- uuid = getUuid()
- log.info(
- "Creating entry for unknown domain: id=%d uuid=%s",
- id, uuid)
- db = self.dbmap.addChild(uuid)
- dominfo = XendDomainInfo.recreate(db, info)
- self._add_domain(dominfo)
- except Exception, ex:
- log.exception("Error creating domain info: id=%d", id)
- return dominfo
+ return self.domains.get(id)
def domain_lookup_by_name(self, name):
dominfo = self.domains.get_by_name(name)
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Jul 29 20:25:03 2005
@@ -31,12 +31,6 @@
from xen.xend.uuid import getUuid
from xen.xend.xenstore import DBVar
-
-"""Flag for a block device backend domain."""
-SIF_BLK_BE_DOMAIN = (1<<4)
-
-"""Flag for a net device backend domain."""
-SIF_NET_BE_DOMAIN = (1<<5)
"""Shutdown code for poweroff."""
DOMAIN_POWEROFF = 0
@@ -170,13 +164,13 @@
"""
dom = info['dom']
vm = cls(db)
+ vm.setdom(dom)
db.readDB()
vm.importFromDB()
config = vm.config
log.debug('info=' + str(info))
log.debug('config=' + prettyprintstring(config))
- vm.setdom(dom)
vm.memory = info['mem_kb']/1024
if config:
@@ -289,6 +283,7 @@
def importFromDB(self):
self.db.importFromDB(self, fields=self.__exports__)
+ self.store_channel = self.eventChannel("store_channel")
def setdom(self, dom):
"""Set the domain id.
@@ -989,6 +984,15 @@
return 0
return timeout - (time.time() - self.shutdown_pending['start'])
+ def dom0_init_store(self):
+ if not self.store_channel:
+ self.store_channel = self.eventChannel("store_channel")
+ self.store_mfn = xc.init_store(self.store_channel.port2)
+ if self.store_mfn >= 0:
+ self.db.introduceDomain(self.id, self.store_mfn,
+ self.store_channel)
+ self.exportToDB(save=True, sync=True)
+
def vm_field_ignore(vm, config, val, index):
"""Dummy config field handler used for fields with built-in handling.
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/XendRoot.py Fri Jul 29 20:25:03 2005
@@ -75,6 +75,10 @@
"""Default port xend serves consoles at. """
console_port_base_default = '9600'
+
+ dom0_min_mem_default = '0'
+
+ dom0_cpus_default = '0'
components = {}
@@ -329,6 +333,12 @@
def get_vif_antispoof(self):
return self.get_config_bool('vif-antispoof', 'yes')
+ def get_dom0_min_mem(self):
+ return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
+
+ def get_dom0_cpus(self):
+ return self.get_config_int('dom0-cpus', self.dom0_cpus_default)
+
def instance():
"""Get an instance of XendRoot.
Use this instead of the constructor.
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/image.py Fri Jul 29 20:25:03 2005
@@ -7,6 +7,12 @@
from xen.xend.xenstore import DBVar
from xen.xend.server import channel
+
+"""Flag for a block device backend domain."""
+SIF_BLK_BE_DOMAIN = (1<<4)
+
+"""Flag for a net device backend domain."""
+SIF_NET_BE_DOMAIN = (1<<5)
class ImageHandler:
"""Abstract base class for image handlers.
@@ -130,7 +136,13 @@
# xc.domain_setuuid(dom, uuid)
xc.domain_setcpuweight(dom, cpu_weight)
xc.domain_setmaxmem(dom, mem_kb)
- xc.domain_memory_increase_reservation(dom, mem_kb)
+
+ try:
+ xc.domain_memory_increase_reservation(dom, mem_kb)
+ except:
+ xc.domain_destroy(dom)
+ raise
+
if cpu != -1:
xc.domain_pincpu(dom, 0, 1<<int(cpu))
return dom
@@ -284,18 +296,19 @@
ret.append("%s" % v)
# Handle hd img related options
- device = sxp.child(self.vm.config, 'device')
- vbdinfo = sxp.child(device, 'vbd')
- if not vbdinfo:
- raise VmError("vmx: missing vbd configuration")
- uname = sxp.child_value(vbdinfo, 'uname')
- vbddev = sxp.child_value(vbdinfo, 'dev')
- (vbdtype, vbdparam) = string.split(uname, ':', 1)
- vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
- if vbdtype != 'file' or vbddev not in vbddev_list:
- raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
- ret.append("-%s" % vbddev)
- ret.append("%s" % vbdparam)
+ devices = sxp.children(self.vm.config, 'device')
+ for device in devices:
+ vbdinfo = sxp.child(device, 'vbd')
+ if not vbdinfo:
+ raise VmError("vmx: missing vbd configuration")
+ uname = sxp.child_value(vbdinfo, 'uname')
+ vbddev = sxp.child_value(vbdinfo, 'dev')
+ (vbdtype, vbdparam) = string.split(uname, ':', 1)
+ vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+ if vbdtype != 'file' or vbddev not in vbddev_list:
+ raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+ ret.append("-%s" % vbddev)
+ ret.append("%s" % vbdparam)
# Handle graphics library related options
vnc = sxp.child_value(self.vm.config, 'vnc')
@@ -352,8 +365,9 @@
def destroy(self):
channel.eventChannelClose(self.device_channel)
- os.system("kill -KILL"
- + " %d" % self.pid)
+ import signal
+ os.kill(self.pid, signal.SIGKILL)
+ (pid, status) = os.waitpid(self.pid, 0)
def getDomainMemory(self, mem_mb):
return (mem_mb * 1024) + self.getPageTableSize(mem_mb)
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Fri Jul 29 20:25:03 2005
@@ -5,7 +5,6 @@
###########################################################
import os
-import os.path
import signal
import sys
import threading
@@ -16,6 +15,7 @@
import StringIO
import traceback
import time
+import glob
from xen.lowlevel import xu
@@ -25,6 +25,7 @@
from xen.xend.XendError import XendError
from xen.xend.server import SrvServer
from xen.xend.XendLogging import log
+from xen.xend import XendRoot; xroot = XendRoot.instance()
import channel
import controller
@@ -184,9 +185,13 @@
log.info("Started xenstored, pid=%d", pid)
else:
# Child
- if XEND_DAEMONIZE and (not XENSTORED_DEBUG):
+ if XEND_DAEMONIZE:
self.daemonize()
- os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
+ if XENSTORED_DEBUG:
+ os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork",
+ "-T", "/var/log/xenstored-trace.log")
+ else:
+ os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
def daemonize(self):
if not XEND_DAEMONIZE: return
@@ -323,6 +328,7 @@
return self.cleanup(kill=True)
def run(self):
+ _enforce_dom0_cpus()
try:
log.info("Xend Daemon started")
self.createFactories()
@@ -359,6 +365,32 @@
#sys.exit(rc)
os._exit(rc)
+def _enforce_dom0_cpus():
+ dn = xroot.get_dom0_cpus()
+
+ for d in glob.glob("/sys/devices/system/cpu/cpu*"):
+ cpu = int(os.path.basename(d)[3:])
+ if (dn == 0) or (cpu < dn):
+ v = "1"
+ else:
+ v = "0"
+ try:
+ f = open("%s/online" %d, "r+")
+ c = f.read(1)
+ if (c != v):
+ if v == "0":
+ log.info("dom0 is trying to give back cpu %d", cpu)
+ else:
+ log.info("dom0 is trying to take cpu %d", cpu)
+ f.seek(0)
+ f.write(v)
+ f.close()
+ log.info("dom0 successfully enforced cpu %d", cpu)
+ else:
+ f.close()
+ except:
+ pass
+
def instance():
global inst
try:
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/server/channel.py Fri Jul 29 20:25:03 2005
@@ -42,16 +42,16 @@
@param port2
"""
try:
- dom1 = int(db['dom1'])
+ dom1 = int(db['dom1'].getData())
except: pass
try:
- dom2 = int(db['dom2'])
+ dom2 = int(db['dom2'].getData())
except: pass
try:
- port1 = int(db['port1'])
+ port1 = int(db['port1'].getData())
except: pass
try:
- port2 = int(db['port2'])
+ port2 = int(db['port2'].getData())
except: pass
evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2)
return evtchn
diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/xenstore/xsnode.py
--- a/tools/python/xen/xend/xenstore/xsnode.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xend/xenstore/xsnode.py Fri Jul 29 20:25:03 2005
@@ -350,7 +350,13 @@
self.watchThread = None
def introduceDomain(self, dom, page, evtchn, path):
- self.getxs().introduce_domain(dom, page, evtchn.port1, path)
+ try:
+ self.getxs().introduce_domain(dom, page, evtchn.port1, path)
+ except RuntimeError, ex:
+ if ex.args[0] == errno.EISCONN:
+ return None
+ else:
+ raise
def releaseDomain(self, dom):
self.getxs().release_domain(dom)
diff -r a4196568095c -r b53a65034532 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xm/create.py Fri Jul 29 20:25:03 2005
@@ -1,4 +1,5 @@
# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 Nguyen Anh Quynh <aquynh@xxxxxxxxx>
"""Domain creation.
"""
@@ -7,10 +8,13 @@
import sys
import socket
+import xen.lowlevel.xc
+
from xen.xend import sxp
from xen.xend import PrettyPrint
from xen.xend.XendClient import server, XendError
from xen.xend.XendBootloader import bootloader
+from xen.xend import XendRoot; xroot = XendRoot.instance()
from xen.util import blkif
from xen.util import console_client
@@ -644,6 +648,36 @@
% (dom, console_port))
return (dom, console_port)
+def get_dom0_alloc():
+ """Return current allocation memory of dom0 (in MB). Return 0 on error"""
+ PROC_XEN_BALLOON = "/proc/xen/balloon"
+
+ f = open(PROC_XEN_BALLOON, "r")
+ line = f.readline()
+ for x in line.split():
+ for n in x:
+ if not n.isdigit():
+ break
+ else:
+ f.close()
+ return int(x)/1024
+ f.close()
+ return 0
+
+def balloon_out(dom0_min_mem, opts):
+ """Balloon out to get memory for domU, if necessarily"""
+ SLACK = 4
+
+ xc = xen.lowlevel.xc.new()
+ pinfo = xc.physinfo()
+ free_mem = pinfo['free_pages']/256
+ if free_mem < opts.vals.memory + SLACK:
+ need_mem = opts.vals.memory + SLACK - free_mem
+ cur_alloc = get_dom0_alloc()
+ if cur_alloc - need_mem >= dom0_min_mem:
+ server.xend_domain_mem_target_set(0, cur_alloc - need_mem)
+ del xc
+
def main(argv):
opts = gopts
args = opts.parse(argv)
@@ -671,6 +705,10 @@
if opts.vals.dryrun:
PrettyPrint.prettyprint(config)
else:
+ dom0_min_mem = xroot.get_dom0_min_mem()
+ if dom0_min_mem != 0:
+ balloon_out(dom0_min_mem, opts)
+
(dom, console) = make_domain(opts, config)
if opts.vals.console_autoconnect:
path = "/var/lib/xend/console-%s" % console
diff -r a4196568095c -r b53a65034532 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/xm/main.py Fri Jul 29 20:25:03 2005
@@ -11,6 +11,13 @@
from xen.xend import PrettyPrint
from xen.xend import sxp
+# this is a nasty place to stick this in, but required because
+# log file access is set up via a 5 deep import chain. This
+# ensures the user sees a useful message instead of a stack trace
+if os.getuid() != 0:
+ print "xm requires root access to execute, please try again as root"
+ sys.exit(1)
+
from xen.xend.XendClient import XendError, server
from xen.xend.XendClient import main as xend_client_main
from xen.xm import create, destroy, migrate, shutdown, sysrq
@@ -390,7 +397,7 @@
d['dom'] = int(sxp.child_value(info, 'id', '-1'))
d['name'] = sxp.child_value(info, 'name', '??')
d['mem'] = int(sxp.child_value(info, 'memory', '0'))
- d['cpu'] = int(sxp.child_value(info, 'cpu', '0'))
+ d['cpu'] = str(sxp.child_value(info, 'cpu', '0'))
d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
d['state'] = sxp.child_value(info, 'state', '??')
d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
@@ -399,12 +406,14 @@
d['port'] = sxp.child_value(console, 'console_port')
else:
d['port'] = ''
+ if d['vcpus'] > 1:
+ d['cpu'] = '-'
if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0):
d['ssidref1'] = int(sxp.child_value(info, 'ssidref', '0')) &
0xffff
d['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >>
16) & 0xffff
- print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s s:%(ssidref2)02x/p:%(ssidref1)02x"
% d)
+ print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s s:%(ssidref2)02x/p:%(ssidref1)02x"
% d)
else:
- print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s" % d)
+ print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s" % d)
def show_vcpus(self, doms):
print 'Name Id VCPU CPU CPUMAP'
@@ -554,9 +563,9 @@
xm.prog(ProgMaxmem)
-class ProgBalloon(Prog):
- group = 'domain'
- name = 'balloon'
+class ProgSetMem(Prog):
+ group = 'domain'
+ name = 'set-mem'
info = """Set the domain's memory footprint using the balloon driver."""
def help(self, args):
@@ -570,7 +579,7 @@
mem_target = int_unit(args[2], 'm')
server.xend_domain_mem_target_set(dom, mem_target)
-xm.prog(ProgBalloon)
+xm.prog(ProgSetMem)
class ProgVcpuhotplug(Prog):
group = 'domain'
diff -r a4196568095c -r b53a65034532 tools/xcs/xcs.h
--- a/tools/xcs/xcs.h Fri Jul 29 18:52:33 2005
+++ b/tools/xcs/xcs.h Fri Jul 29 20:25:03 2005
@@ -37,7 +37,7 @@
/* ------[ Other required defines ]----------------------------------------*/
/* Size of a machine page frame. */
-#define PAGE_SIZE 4096
+#define PAGE_SIZE XC_PAGE_SIZE
#ifndef timersub /* XOPEN and __BSD don't cooperate well... */
#define timersub(a, b, result) \
diff -r a4196568095c -r b53a65034532 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/Makefile Fri Jul 29 20:25:03 2005
@@ -86,9 +86,9 @@
$(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
stresstest: xs_stress xs_watch_stress xenstored_test
- rm -rf $(TESTDIR)/store
+ rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
export $(TESTENV); PID=`./xenstored_test --output-pid
--trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret
- rm -rf $(TESTDIR)/store
+ rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
export $(TESTENV); PID=`./xenstored_test --output-pid`;
./xs_watch_stress; ret=$$?; kill $$PID; exit $$ret
xs_dom0_test: xs_dom0_test.o utils.o
diff -r a4196568095c -r b53a65034532 tools/xenstore/TODO
--- a/tools/xenstore/TODO Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/TODO Fri Jul 29 20:25:03 2005
@@ -2,8 +2,9 @@
are omissions of important but necessary things. It is up to the
reader to fill in the blanks.
-- Remove calls to system() from daemon
- Timeout failed watch responses
-- Dynamic nodes
+- Dynamic/supply nodes
- Persistant storage of introductions, watches and transactions, so daemon can
restart
- Remove assumption that rename doesn't fail
+- Multi-root transactions, for setting up front and back ends at same time.
+
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/02directory.sh
--- a/tools/xenstore/testsuite/02directory.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/02directory.sh Fri Jul 29 20:25:03 2005
@@ -1,22 +1,23 @@
#! /bin/sh
-# Root directory has nothing in it.
-[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ]
+# Root directory has only tool dir in it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "tool" ]
# Create a file.
[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
# Directory shows it.
-[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ]
+[ "`echo -e 'dir /' | ./xs_test 2>&1 | sort`" = "test
+tool" ]
# Make a new directory.
[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
# Check it's there.
-DIR="`echo -e 'dir /' | ./xs_test 2>&1`"
-[ "$DIR" = "test
-dir" ] || [ "$DIR" = "dir
-test" ]
+DIR="`echo -e 'dir /' | ./xs_test 2>&1 | sort`"
+[ "$DIR" = "dir
+test
+tool" ]
# Check it's empty.
[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ]
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/07watch.sh
--- a/tools/xenstore/testsuite/07watch.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/07watch.sh Fri Jul 29 20:25:03 2005
@@ -3,45 +3,52 @@
# Watch something, write to it, check watch has fired.
[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
-[ "`echo -e '1 watch /test token 100
-2 write /test create contents2
+[ "`echo -e '1 watch /test token
+2 async write /test create contents2
1 waitwatch
1 ackwatch token' | ./xs_test 2>&1`" = "1:/test:token" ]
# Check that reads don't set it off.
-[ "`echo -e '1 watch /test token 100
+[ "`echo -e '1 watch /test token
2 read /test
1 waitwatch' | ./xs_test 2>&1`" = "2:contents2
1:waitwatch timeout" ]
# mkdir, setperm and rm should (also tests watching dirs)
[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
-[ "`echo -e '1 watch /dir token 100
-2 mkdir /dir/newdir
+[ "`echo -e '1 watch /dir token
+2 async mkdir /dir/newdir
1 waitwatch
1 ackwatch token
-2 setperm /dir/newdir 0 READ
+asyncwait
+2 async setperm /dir/newdir 0 READ
1 waitwatch
1 ackwatch token
-2 rm /dir/newdir
+asyncwait
+2 async rm /dir/newdir
1 waitwatch
1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/newdir:token
1:/dir/newdir:token
1:/dir/newdir:token" ]
+# We don't get a watch from our own commands.
+[ "`echo -e 'watch /dir token
+mkdir /dir/newdir
+waitwatch' | ./xs_test 2>&1`" = "waitwatch timeout" ]
+
# ignore watches while doing commands, should work.
-[ "`echo -e 'watch /dir token 100
-write /dir/test create contents
+[ "`echo -e 'watch /dir token
+1 async write /dir/test create contents
read /dir/test
waitwatch
ackwatch token' | ./xs_test 2>&1`" = "contents
/dir/test:token" ]
-# watch priority /test.
-[ "`echo -e '1 watch /dir token1 1
-3 watch /dir token3 3
-2 watch /dir token2 2
-write /dir/test create contents
+# watch priority test: all simultaneous
+[ "`echo -e '1 watch /dir token1
+3 watch /dir token3
+2 watch /dir token2
+async write /dir/test create contents
3 waitwatch
3 ackwatch token3
2 waitwatch
@@ -52,9 +59,9 @@
1:/dir/test:token1" ]
# If one dies (without acking), the other should still get ack.
-[ "`echo -e '1 watch /dir token1 0
-2 watch /dir token2 1
-write /dir/test create contents
+[ "`echo -e '1 watch /dir token1
+2 watch /dir token2
+async write /dir/test create contents
2 waitwatch
2 close
1 waitwatch
@@ -62,51 +69,52 @@
1:/dir/test:token1" ]
# If one dies (without reading at all), the other should still get ack.
-[ "`echo -e '1 watch /dir token1 0
-2 watch /dir token2 1
-write /dir/test create contents
+[ "`echo -e '1 watch /dir token1
+2 watch /dir token2
+async write /dir/test create contents
2 close
1 waitwatch
1 ackwatch token1' | ./xs_test 2>&1`" = "1:/dir/test:token1" ]
# unwatch
-[ "`echo -e '1 watch /dir token1 0
+[ "`echo -e '1 watch /dir token1
1 unwatch /dir token1
-1 watch /dir token2 0
-2 write /dir/test2 create contents
+1 watch /dir token2
+2 async write /dir/test2 create contents
1 waitwatch
1 unwatch /dir token2' | ./xs_test 2>&1`" = "1:/dir/test2:token2" ]
# unwatch while watch pending. Next watcher gets the event.
-[ "`echo -e '1 watch /dir token1 0
-2 watch /dir token2 1
-write /dir/test create contents
+[ "`echo -e '1 watch /dir token1
+2 watch /dir token2
+async write /dir/test create contents
2 unwatch /dir token2
1 waitwatch
1 ackwatch token1' | ./xs_test 2>&1`" = "1:/dir/test:token1" ]
# unwatch while watch pending. Should clear this so we get next event.
-[ "`echo -e '1 watch /dir token1 0
-write /dir/test create contents
+[ "`echo -e '1 watch /dir token1
+async write /dir/test create contents
1 unwatch /dir token1
-1 watch /dir/test token2 0
-write /dir/test none contents2
+1 watch /dir/test token2
+asyncwait
+async write /dir/test none contents2
1 waitwatch
1 ackwatch token2' | ./xs_test 2>&1`" = "1:/dir/test:token2" ]
# check we only get notified once.
-[ "`echo -e '1 watch /test token 100
-2 write /test create contents2
+[ "`echo -e '1 watch /test token
+2 async write /test create contents2
1 waitwatch
1 ackwatch token
1 waitwatch' | ./xs_test 2>&1`" = "1:/test:token
1:waitwatch timeout" ]
# watches are queued in order.
-[ "`echo -e '1 watch / token 100
-2 write /test1 create contents
-2 write /test2 create contents
-2 write /test3 create contents
+[ "`echo -e '1 watch / token
+async 2 write /test1 create contents
+async 2 write /test2 create contents
+async 2 write /test3 create contents
1 waitwatch
1 ackwatch token
1 waitwatch
@@ -117,9 +125,9 @@
1:/test3:token" ]
# Creation of subpaths should be covered correctly.
-[ "`echo -e '1 watch / token 100
-2 write /test/subnode create contents2
-2 write /test/subnode/subnode create contents2
+[ "`echo -e '1 watch / token
+2 async write /test/subnode create contents2
+2 async write /test/subnode/subnode create contents2
1 waitwatch
1 ackwatch token
1 waitwatch
@@ -129,23 +137,23 @@
1:waitwatch timeout" ]
# Watch event must have happened before we registered interest.
-[ "`echo -e '1 watch / token 100
-2 write /test/subnode create contents2
-2 watch / token2 0
+[ "`echo -e '1 watch / token
+2 async write /test/subnode create contents2
+1 watch / token2 0
1 waitwatch
1 ackwatch token
-2 waitwatch' | ./xs_test 2>&1`" = "1:/test/subnode:token
-2:waitwatch timeout" ]
+1 waitwatch' | ./xs_test 2>&1`" = "1:/test/subnode:token
+1:waitwatch timeout" ]
# Rm fires notification on child.
-[ "`echo -e '1 watch /test/subnode token 100
-2 rm /test
+[ "`echo -e '1 watch /test/subnode token
+2 async rm /test
1 waitwatch
1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/subnode:token" ]
# Watch should not double-send after we ack, even if we did something in
between.
-[ "`echo -e '1 watch /test2 token 100
-2 write /test2/foo create contents2
+[ "`echo -e '1 watch /test2 token
+2 async write /test2/foo create contents2
1 waitwatch
1 read /test2/foo
1 ackwatch token
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/08transaction.sh
--- a/tools/xenstore/testsuite/08transaction.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/08transaction.sh Fri Jul 29 20:25:03 2005
@@ -1,79 +1,93 @@
#! /bin/sh
# Test transactions.
+echo mkdir /test | ./xs_test
+
# Simple transaction: create a file inside transaction.
-[ "`echo -e '1 start /
-1 write /entry1 create contents
-2 dir /
-1 dir /
+[ "`echo -e '1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+1 dir /test
1 commit
-2 read /entry1' | ./xs_test`" = "1:entry1
+2 read /test/entry1' | ./xs_test`" = "1:entry1
2:contents" ]
-echo rm /entry1 | ./xs_test
+echo rm /test/entry1 | ./xs_test
# Create a file and abort transaction.
-[ "`echo -e '1 start /
-1 write /entry1 create contents
-2 dir /
-1 dir /
+[ "`echo -e '1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+1 dir /test
1 abort
-2 dir /' | ./xs_test`" = "1:entry1" ]
+2 dir /test' | ./xs_test`" = "1:entry1" ]
-echo write /entry1 create contents | ./xs_test
+echo write /test/entry1 create contents | ./xs_test
# Delete in transaction, commit
-[ "`echo -e '1 start /
-1 rm /entry1
-2 dir /
-1 dir /
+[ "`echo -e '1 start /test
+1 rm /test/entry1
+2 dir /test
+1 dir /test
1 commit
-2 dir /' | ./xs_test`" = "2:entry1" ]
+2 dir /test' | ./xs_test`" = "2:entry1" ]
# Delete in transaction, abort.
-echo write /entry1 create contents | ./xs_test
-[ "`echo -e '1 start /
-1 rm /entry1
-2 dir /
-1 dir /
+echo write /test/entry1 create contents | ./xs_test
+[ "`echo -e '1 start /test
+1 rm /test/entry1
+2 dir /test
+1 dir /test
1 abort
-2 dir /' | ./xs_test`" = "2:entry1
+2 dir /test' | ./xs_test`" = "2:entry1
2:entry1" ]
# Transactions can take as long as the want...
-[ "`echo -e 'start /
+[ "`echo -e 'start /test
sleep 1
-rm /entry1
+rm /test/entry1
commit
-dir /' | ./xs_test`" = "" ]
+dir /test' | ./xs_test --no-timeout`" = "" ]
# ... as long as noone is waiting.
-[ "`echo -e '1 start /
-2 mkdir /dir
-1 mkdir /dir
-1 dir /
-1 commit' | ./xs_test 2>&1`" = "1:dir
+[ "`echo -e '1 start /test
+2 mkdir /test/dir
+1 mkdir /test/dir
+1 dir /test
+1 commit' | ./xs_test --no-timeout 2>&1`" = "1:dir
FATAL: 1: commit: Connection timed out" ]
# Events inside transactions don't trigger watches until (successful) commit.
-[ "`echo -e '1 watch / token 100
-2 start /
-2 mkdir /dir/sub
+[ "`echo -e '1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
1 waitwatch' | ./xs_test 2>&1`" = "1:waitwatch timeout" ]
-[ "`echo -e '1 watch / token 100
-2 start /
-2 mkdir /dir/sub
+[ "`echo -e '1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
2 abort
1 waitwatch' | ./xs_test 2>&1`" = "1:waitwatch timeout" ]
-[ "`echo -e '1 watch / token 100
-2 start /
-2 mkdir /dir/sub
-2 commit
+[ "`echo -e '1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+2 async commit
1 waitwatch
-1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/sub:token" ]
+1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/dir/sub:token" ]
# Rm inside transaction works like rm outside: children get notified.
-[ "`echo -e '1 watch /dir/sub token 100
-2 start /
-2 rm /dir
-2 commit
+[ "`echo -e '1 watch /test/dir/sub token
+2 start /test
+2 rm /test/dir
+2 async commit
1 waitwatch
-1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/sub:token" ]
+1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/dir/sub:token" ]
+
+# Multiple events from single transaction don't trigger assert
+[ "`echo -e '1 watch /test token
+2 start /test
+2 write /test/1 create contents
+2 write /test/2 create contents
+2 async commit
+1 waitwatch
+1 ackwatch token
+1 waitwatch
+1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/1:token
+1:/test/2:token" ]
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/09domain.sh
--- a/tools/xenstore/testsuite/09domain.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/09domain.sh Fri Jul 29 20:25:03 2005
@@ -4,8 +4,9 @@
# Create a domain, write an entry.
[ "`echo -e 'introduce 1 100 7 /my/home
1 write /entry1 create contents
-dir /' | ./xs_test 2>&1`" = "handle is 1
-entry1" ]
+dir /' | ./xs_test 2>&1 | sort`" = "entry1
+handle is 1
+tool" ]
# Release that domain.
[ "`echo -e 'release 1' | ./xs_test`" = "" ]
diff -r a4196568095c -r b53a65034532
tools/xenstore/testsuite/10domain-homedir.sh
--- a/tools/xenstore/testsuite/10domain-homedir.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.sh Fri Jul 29 20:25:03 2005
@@ -13,8 +13,8 @@
# Place a watch using a relative path: expect relative answer.
[ "`echo 'introduce 1 100 7 /home
1 mkdir foo
-1 watch foo token 0
-write /home/foo/bar create contents
+1 watch foo token
+async write /home/foo/bar create contents
1 waitwatch
1 ackwatch token' | ./xs_test 2>&1`" = "handle is 1
1:foo/bar:token" ]
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/11domain-watch.sh
--- a/tools/xenstore/testsuite/11domain-watch.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/11domain-watch.sh Fri Jul 29 20:25:03 2005
@@ -6,42 +6,46 @@
[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
[ "`echo -e 'introduce 1 100 7 /my/home
-1 watch /test token 100
-write /test create contents2
+1 watch /test token
+async write /test create contents2
1 waitwatch
1 ackwatch token
1 unwatch /test token
+asyncwait
release 1' | ./xs_test 2>&1`" = "handle is 1
1:/test:token" ]
# ignore watches while doing commands, should work.
[ "`echo -e 'introduce 1 100 7 /my/home
-1 watch /dir token 100
-1 write /dir/test create contents
-1 read /dir/test
+1 watch /dir token
+async write /dir/test create contents
+1 write /dir/test2 create contents2
+1 write /dir/test3 create contents3
+1 write /dir/test4 create contents4
1 waitwatch
1 ackwatch token
+asyncwait
release 1' | ./xs_test 2>&1`" = "handle is 1
-1:contents
1:/dir/test:token" ]
# unwatch
[ "`echo -e 'introduce 1 100 7 /my/home
-1 watch /dir token1 0
+1 watch /dir token1
1 unwatch /dir token1
-1 watch /dir token2 0
-2 write /dir/test2 create contents
+1 watch /dir token2
+async 2 write /dir/test2 create contents
1 waitwatch
1 unwatch /dir token2
+asyncwait
release 1' | ./xs_test 2>&1`" = "handle is 1
1:/dir/test2:token2" ]
# unwatch while watch pending.
[ "`echo -e 'introduce 1 100 7 /my/home
introduce 2 101 8 /my/secondhome
-1 watch /dir token1 0
-2 watch /dir token2 1
-write /dir/test create contents
+1 watch /dir token1
+2 watch /dir token2
+3 async write /dir/test create contents
2 unwatch /dir token2
1 waitwatch
1 ackwatch token1
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/12readonly.sh
--- a/tools/xenstore/testsuite/12readonly.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/12readonly.sh Fri Jul 29 20:25:03 2005
@@ -4,16 +4,17 @@
[ "`echo 'write /test create contents' | ./xs_test 2>&1`" = "" ]
# These are all valid.
-[ "`echo 'dir /
-read /test
+[ "`echo dir / | ./xs_test --readonly 2>&1 | sort`" = "test
+tool" ]
+
+[ "`echo 'read /test
getperm /test
-watch /test token 0
+watch /test token
unwatch /test token
start /
commit
start /
-abort' | ./xs_test --readonly 2>&1`" = "test
-contents
+abort' | ./xs_test --readonly 2>&1`" = "contents
0 READ" ]
# These don't work
@@ -26,7 +27,7 @@
# Check that watches work like normal.
set -m
-[ "`echo 'watch / token 0
+[ "`echo 'watch / token
waitwatch
ackwatch token' | ./xs_test --readonly 2>&1`" = "/test:token" ] &
@@ -35,6 +36,3 @@
echo Readonly wait test failed: $?
exit 1
fi
-
-
-
diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/test.sh
--- a/tools/xenstore/testsuite/test.sh Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/testsuite/test.sh Fri Jul 29 20:25:03 2005
@@ -9,7 +9,7 @@
mkdir $XENSTORED_ROOTDIR
# Weird failures with this.
if type valgrind >/dev/null 2>&1; then
- valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork
3>testsuite/tmp/vgout > /tmp/pid 2> testsuite/tmp/xenstored_errors &
+ valgrind -q --logfile-fd=3 ./xenstored_test --output-pid
--trace-file=testsuite/tmp/trace --no-fork 3>testsuite/tmp/vgout > /tmp/pid 2>
testsuite/tmp/xenstored_errors &
while [ ! -s /tmp/pid ]; do sleep 0; done
PID=`cat /tmp/pid`
rm /tmp/pid
@@ -33,12 +33,17 @@
fi
}
+MATCH=${1:-"*"}
for f in testsuite/[0-9]*.sh; do
+ case `basename $f` in $MATCH) RUN=1;; esac
+ [ -n "$RUN" ] || continue
if run_test $f; then
echo Test $f passed...
else
echo Test $f failed, running verbosely...
- run_test $f -x
+ run_test $f -x || true
+ # That will have filled the screen, repeat message.
+ echo Test $f failed
exit 1
fi
done
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_core.c Fri Jul 29 20:25:03 2005
@@ -51,7 +51,7 @@
#include "xenstored_domain.h"
static bool verbose;
-static LIST_HEAD(connections);
+LIST_HEAD(connections);
static int tracefd = -1;
#ifdef TESTING
@@ -111,6 +111,8 @@
str = talloc_vasprintf(NULL, fmt, arglist);
va_end(arglist);
+ trace("xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
eprintf("xenstored corruption: connection id %i: err %s: %s",
conn ? (int)conn->id : -1, strerror(saved_errno), str);
#ifdef TESTING
@@ -230,6 +232,21 @@
write(tracefd, ")\n", 2);
}
+void trace(const char *fmt, ...)
+{
+ va_list arglist;
+ char *str;
+
+ if (tracefd < 0)
+ return;
+
+ va_start(arglist, fmt);
+ str = talloc_vasprintf(NULL, fmt, arglist);
+ va_end(arglist);
+ write(tracefd, str, strlen(str));
+ talloc_free(str);
+}
+
static bool write_message(struct connection *conn)
{
int ret;
@@ -253,7 +270,7 @@
out->used = 0;
/* Second write might block if non-zero. */
- if (out->hdr.msg.len)
+ if (out->hdr.msg.len && !conn->domain)
return true;
}
@@ -318,7 +335,7 @@
list_for_each_entry(i, &connections, list) {
if (i->domain)
continue;
- if (!i->blocked)
+ if (i->state == OK)
FD_SET(i->fd, inset);
if (i->out)
FD_SET(i->fd, outset);
@@ -454,8 +471,7 @@
return i;
}
-/* Returns "false", meaning "connection is not blocked". */
-bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
const void *data, unsigned int len)
{
struct buffered_data *bdata;
@@ -476,16 +492,15 @@
conn->waiting_reply = bdata;
} else
conn->out = bdata;
- return false;
}
/* Some routines (write, mkdir, etc) just need a non-error return */
-bool send_ack(struct connection *conn, enum xsd_sockmsg_type type)
-{
- return send_reply(conn, type, "OK", sizeof("OK"));
-}
-
-bool send_error(struct connection *conn, int error)
+void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
+{
+ send_reply(conn, type, "OK", sizeof("OK"));
+}
+
+void send_error(struct connection *conn, int error)
{
unsigned int i;
@@ -494,7 +509,7 @@
corrupt(conn, "Unknown error %i (%s)", error,
strerror(error));
- return send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
+ send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
strlen(xsd_errors[i].errstring) + 1);
}
@@ -780,7 +795,7 @@
return false;
}
-static bool send_directory(struct connection *conn, const char *node)
+static void send_directory(struct connection *conn, const char *node)
{
char *path, *reply = talloc_strdup(node, "");
unsigned int reply_len = 0;
@@ -788,13 +803,17 @@
struct dirent *dirent;
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_READ))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, node, XS_PERM_READ)) {
+ send_error(conn, errno);
+ return;
+ }
path = node_dir(conn->transaction, node);
dir = talloc_opendir(path);
- if (!dir)
- return send_error(conn, errno);
+ if (!dir) {
+ send_error(conn, errno);
+ return;
+ }
while ((dirent = readdir(*dir)) != NULL) {
int len = strlen(dirent->d_name) + 1;
@@ -807,32 +826,35 @@
reply_len += len;
}
- return send_reply(conn, XS_DIRECTORY, reply, reply_len);
-}
-
-static bool do_read(struct connection *conn, const char *node)
+ send_reply(conn, XS_DIRECTORY, reply, reply_len);
+}
+
+static void do_read(struct connection *conn, const char *node)
{
char *value;
unsigned int size;
int *fd;
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_READ))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, node, XS_PERM_READ)) {
+ send_error(conn, errno);
+ return;
+ }
fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
if (!fd) {
/* Data file doesn't exist? We call that a directory */
if (errno == ENOENT)
errno = EISDIR;
- return send_error(conn, errno);
+ send_error(conn, errno);
+ return;
}
value = read_all(fd, &size);
if (!value)
- return send_error(conn, errno);
-
- return send_reply(conn, XS_READ, value, size);
+ send_error(conn, errno);
+ else
+ send_reply(conn, XS_READ, value, size);
}
/* Create a new directory. Optionally put data in it (if data != NULL) */
@@ -876,7 +898,7 @@
}
/* path, flags, data... */
-static bool do_write(struct connection *conn, struct buffered_data *in)
+static void do_write(struct connection *conn, struct buffered_data *in)
{
unsigned int offset, datalen;
char *vec[2];
@@ -885,16 +907,20 @@
struct stat st;
/* Extra "strings" can be created by binary data. */
- if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
- return send_error(conn, EINVAL);
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
+ send_error(conn, EINVAL);
+ return;
+ }
node = canonicalize(conn, vec[0]);
if (/*suppress error on write outside transaction*/ 0 &&
- !within_transaction(conn->transaction, node))
- return send_error(conn, EROFS);
+ !within_transaction(conn->transaction, node)) {
+ send_error(conn, EROFS);
+ return;
+ }
if (transaction_block(conn, node))
- return true;
+ return;
offset = strlen(vec[0]) + strlen(vec[1]) + 2;
datalen = in->used - offset;
@@ -905,196 +931,243 @@
mode = XS_PERM_WRITE|XS_PERM_CREATE;
else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
mode = XS_PERM_WRITE|XS_PERM_CREATE;
- else
- return send_error(conn, EINVAL);
-
- if (!check_node_perms(conn, node, mode))
- return send_error(conn, errno);
+ else {
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ if (!check_node_perms(conn, node, mode)) {
+ send_error(conn, errno);
+ return;
+ }
if (lstat(node_dir(conn->transaction, node), &st) != 0) {
/* Does not exist... */
- if (errno != ENOENT)
- return send_error(conn, errno);
+ if (errno != ENOENT) {
+ send_error(conn, errno);
+ return;
+ }
/* Not going to create it? */
- if (!(mode & XS_PERM_CREATE))
- return send_error(conn, ENOENT);
-
- if (!new_directory(conn, node, in->buffer + offset, datalen))
- return send_error(conn, errno);
+ if (!(mode & XS_PERM_CREATE)) {
+ send_error(conn, ENOENT);
+ return;
+ }
+
+ if (!new_directory(conn, node, in->buffer + offset, datalen)) {
+ send_error(conn, errno);
+ return;
+ }
} else {
/* Exists... */
- if (streq(vec[1], XS_WRITE_CREATE_EXCL))
- return send_error(conn, EEXIST);
+ if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
+ send_error(conn, EEXIST);
+ return;
+ }
tmppath = tempfile(node_datafile(conn->transaction, node),
in->buffer + offset, datalen);
- if (!tmppath)
- return send_error(conn, errno);
+ if (!tmppath) {
+ send_error(conn, errno);
+ return;
+ }
commit_tempfile(tmppath);
}
add_change_node(conn->transaction, node, false);
+ fire_watches(conn, node, false);
send_ack(conn, XS_WRITE);
- fire_watches(conn->transaction, node, false);
- return false;
-}
-
-static bool do_mkdir(struct connection *conn, const char *node)
+}
+
+static void do_mkdir(struct connection *conn, const char *node)
{
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE))
- return send_error(conn, errno);
-
- if (!within_transaction(conn->transaction, node))
- return send_error(conn, EROFS);
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) {
+ send_error(conn, errno);
+ return;
+ }
+
+ if (!within_transaction(conn->transaction, node)) {
+ send_error(conn, EROFS);
+ return;
+ }
if (transaction_block(conn, node))
- return true;
-
- if (!new_directory(conn, node, NULL, 0))
- return send_error(conn, errno);
+ return;
+
+ if (!new_directory(conn, node, NULL, 0)) {
+ send_error(conn, errno);
+ return;
+ }
add_change_node(conn->transaction, node, false);
+ fire_watches(conn, node, false);
send_ack(conn, XS_MKDIR);
- fire_watches(conn->transaction, node, false);
- return false;
-}
-
-static bool do_rm(struct connection *conn, const char *node)
+}
+
+static void do_rm(struct connection *conn, const char *node)
{
char *tmppath, *path;
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_WRITE))
- return send_error(conn, errno);
-
- if (!within_transaction(conn->transaction, node))
- return send_error(conn, EROFS);
+ if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
+ send_error(conn, errno);
+ return;
+ }
+
+ if (!within_transaction(conn->transaction, node)) {
+ send_error(conn, EROFS);
+ return;
+ }
if (transaction_block(conn, node))
- return true;
-
- if (streq(node, "/"))
- return send_error(conn, EINVAL);
+ return;
+
+ if (streq(node, "/")) {
+ send_error(conn, EINVAL);
+ return;
+ }
/* We move the directory to temporary name, destructor cleans up. */
path = node_dir(conn->transaction, node);
tmppath = talloc_asprintf(node, "%s.tmp", path);
talloc_set_destructor(tmppath, destroy_path);
- if (rename(path, tmppath) != 0)
- return send_error(conn, errno);
+ if (rename(path, tmppath) != 0) {
+ send_error(conn, errno);
+ return;
+ }
add_change_node(conn->transaction, node, true);
+ fire_watches(conn, node, true);
send_ack(conn, XS_RM);
- fire_watches(conn->transaction, node, true);
- return false;
-}
-
-static bool do_get_perms(struct connection *conn, const char *node)
+}
+
+static void do_get_perms(struct connection *conn, const char *node)
{
struct xs_permissions *perms;
char *strings;
unsigned int len, num;
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_READ))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, node, XS_PERM_READ)) {
+ send_error(conn, errno);
+ return;
+ }
perms = get_perms(conn->transaction, node, &num);
- if (!perms)
- return send_error(conn, errno);
+ if (!perms) {
+ send_error(conn, errno);
+ return;
+ }
strings = perms_to_strings(node, perms, num, &len);
if (!strings)
- return send_error(conn, errno);
-
- return send_reply(conn, XS_GET_PERMS, strings, len);
-}
-
-static bool do_set_perms(struct connection *conn, struct buffered_data *in)
+ send_error(conn, errno);
+ else
+ send_reply(conn, XS_GET_PERMS, strings, len);
+}
+
+static void do_set_perms(struct connection *conn, struct buffered_data *in)
{
unsigned int num;
char *node;
struct xs_permissions *perms;
num = xs_count_strings(in->buffer, in->used);
- if (num < 2)
- return send_error(conn, EINVAL);
+ if (num < 2) {
+ send_error(conn, EINVAL);
+ return;
+ }
/* First arg is node name. */
node = canonicalize(conn, in->buffer);
in->buffer += strlen(in->buffer) + 1;
num--;
- if (!within_transaction(conn->transaction, node))
- return send_error(conn, EROFS);
+ if (!within_transaction(conn->transaction, node)) {
+ send_error(conn, EROFS);
+ return;
+ }
if (transaction_block(conn, node))
- return true;
+ return;
/* We must own node to do this (tools can do this too). */
- if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
+ send_error(conn, errno);
+ return;
+ }
perms = talloc_array(node, struct xs_permissions, num);
- if (!xs_strings_to_perms(perms, num, in->buffer))
- return send_error(conn, errno);
-
- if (!set_perms(conn->transaction, node, perms, num))
- return send_error(conn, errno);
+ if (!xs_strings_to_perms(perms, num, in->buffer)) {
+ send_error(conn, errno);
+ return;
+ }
+
+ if (!set_perms(conn->transaction, node, perms, num)) {
+ send_error(conn, errno);
+ return;
+ }
+
add_change_node(conn->transaction, node, false);
+ fire_watches(conn, node, false);
send_ack(conn, XS_SET_PERMS);
- fire_watches(conn->transaction, node, false);
- return false;
}
/* Process "in" for conn: "in" will vanish after this conversation, so
* we can talloc off it for temporary variables. May free "conn".
- * Returns true if can't complete due to block.
*/
-static bool process_message(struct connection *conn, struct buffered_data *in)
+static void process_message(struct connection *conn, struct buffered_data *in)
{
switch (in->hdr.msg.type) {
case XS_DIRECTORY:
- return send_directory(conn, onearg(in));
+ send_directory(conn, onearg(in));
+ break;
case XS_READ:
- return do_read(conn, onearg(in));
+ do_read(conn, onearg(in));
+ break;
case XS_WRITE:
- return do_write(conn, in);
+ do_write(conn, in);
+ break;
case XS_MKDIR:
- return do_mkdir(conn, onearg(in));
+ do_mkdir(conn, onearg(in));
+ break;
case XS_RM:
- return do_rm(conn, onearg(in));
+ do_rm(conn, onearg(in));
+ break;
case XS_GET_PERMS:
- return do_get_perms(conn, onearg(in));
+ do_get_perms(conn, onearg(in));
+ break;
case XS_SET_PERMS:
- return do_set_perms(conn, in);
+ do_set_perms(conn, in);
+ break;
case XS_SHUTDOWN:
/* FIXME: Implement gentle shutdown too. */
/* Only tools can do this. */
- if (conn->id != 0)
- return send_error(conn, EACCES);
- if (!conn->can_write)
- return send_error(conn, EROFS);
+ if (conn->id != 0) {
+ send_error(conn, EACCES);
+ break;
+ }
+ if (!conn->can_write) {
+ send_error(conn, EROFS);
+ break;
+ }
send_ack(conn, XS_SHUTDOWN);
/* Everything hangs off auto-free context, freed at exit. */
exit(0);
case XS_DEBUG:
- if (streq(in->buffer, "print")) {
+ if (streq(in->buffer, "print"))
xprintf("debug: %s", in->buffer + get_string(in, 0));
- return false;
- }
#ifdef TESTING
/* For testing, we allow them to set id. */
if (streq(in->buffer, "setid")) {
@@ -1107,37 +1180,44 @@
failtest = true;
}
#endif /* TESTING */
- return false;
+ break;
case XS_WATCH:
- return do_watch(conn, in);
+ do_watch(conn, in);
+ break;
case XS_WATCH_ACK:
- return do_watch_ack(conn, onearg(in));
+ do_watch_ack(conn, onearg(in));
+ break;
case XS_UNWATCH:
- return do_unwatch(conn, in);
+ do_unwatch(conn, in);
+ break;
case XS_TRANSACTION_START:
- return do_transaction_start(conn, onearg(in));
+ do_transaction_start(conn, onearg(in));
+ break;
case XS_TRANSACTION_END:
- return do_transaction_end(conn, onearg(in));
+ do_transaction_end(conn, onearg(in));
+ break;
case XS_INTRODUCE:
- return do_introduce(conn, in);
+ do_introduce(conn, in);
+ break;
case XS_RELEASE:
- return do_release(conn, onearg(in));
+ do_release(conn, onearg(in));
+ break;
case XS_GETDOMAINPATH:
- return do_get_domain_path(conn, onearg(in));
+ do_get_domain_path(conn, onearg(in));
+ break;
case XS_WATCH_EVENT:
default:
eprintf("Client unknown operation %i", in->hdr.msg.type);
send_error(conn, ENOSYS);
- return false;
}
}
@@ -1151,6 +1231,8 @@
struct buffered_data *in = NULL;
enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
jmp_buf talloc_fail;
+
+ assert(conn->state == OK);
/* For simplicity, we kill the connection on OOM. */
talloc_set_fail_handler(out_of_mem, &talloc_fail);
@@ -1166,7 +1248,7 @@
/* We might get a command while waiting for an ack: this means
* the other end discarded it: we will re-transmit. */
if (type != XS_WATCH_ACK)
- conn->waiting_for_ack = false;
+ conn->waiting_for_ack = NULL;
/* Careful: process_message may free connection. We detach
* "in" beforehand and allocate the new buffer to avoid
@@ -1174,7 +1256,9 @@
*/
in = talloc_steal(talloc_autofree_context(), conn->in);
conn->in = new_buffer(conn);
- if (process_message(conn, in)) {
+ process_message(conn, in);
+
+ if (conn->state == BLOCKED) {
/* Blocked by transaction: queue for re-xmit. */
talloc_free(conn->in);
conn->in = in;
@@ -1197,7 +1281,7 @@
int bytes;
struct buffered_data *in;
- assert(!conn->blocked);
+ assert(conn->state == OK);
in = conn->in;
/* Not finished header yet? */
@@ -1254,13 +1338,17 @@
struct connection *i, *tmp;
list_for_each_entry_safe(i, tmp, &connections, list) {
- if (!i->blocked)
- continue;
-
- if (!transaction_covering_node(i->blocked)) {
- talloc_free(i->blocked);
- i->blocked = NULL;
- consider_message(i);
+ switch (i->state) {
+ case BLOCKED:
+ if (!transaction_covering_node(i->blocked_by)) {
+ talloc_free(i->blocked_by);
+ i->blocked_by = NULL;
+ i->state = OK;
+ consider_message(i);
+ }
+ break;
+ case OK:
+ break;
}
}
@@ -1281,7 +1369,8 @@
if (!new)
return NULL;
- new->blocked = false;
+ new->state = OK;
+ new->blocked_by = NULL;
new->out = new->waiting_reply = NULL;
new->fd = -1;
new->id = 0;
@@ -1290,6 +1379,7 @@
new->write = write;
new->read = read;
new->can_write = true;
+ INIT_LIST_HEAD(&new->watches);
talloc_set_fail_handler(out_of_mem, &talloc_fail);
if (setjmp(talloc_fail)) {
@@ -1358,12 +1448,14 @@
list_for_each_entry(i, &connections, list) {
printf("Connection %p:\n", i);
+ printf(" state = %s\n",
+ i->state == OK ? "OK"
+ : i->state == BLOCKED ? "BLOCKED"
+ : "INVALID");
if (i->id)
printf(" id = %i\n", i->id);
- if (i->blocked)
- printf(" blocked on = %s\n", i->blocked);
- if (i->waiting_for_ack)
- printf(" waiting_for_ack TRUE\n");
+ if (i->blocked_by)
+ printf(" blocked on = %s\n", i->blocked_by);
if (!i->in->inhdr || i->in->used)
printf(" got %i bytes of %s\n",
i->in->used, i->in->inhdr ? "header" : "data");
@@ -1385,6 +1477,44 @@
}
}
#endif
+
+static void setup_structure(void)
+{
+ struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
+ char *root, *dir, *permfile;
+
+ /* Create root directory, with permissions. */
+ if (mkdir(xs_daemon_store(), 0750) != 0) {
+ if (errno != EEXIST)
+ barf_perror("Could not create root %s",
+ xs_daemon_store());
+ return;
+ }
+ root = talloc_strdup(talloc_autofree_context(), "/");
+ if (!set_perms(NULL, root, &perms, 1))
+ barf_perror("Could not create permissions in root");
+
+ /* Create tool directory, with xenstored subdir. */
+ dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
+ if (mkdir(dir, 0750) != 0)
+ barf_perror("Making dir %s", dir);
+
+ permfile = talloc_strdup(root, "/tool");
+ if (!set_perms(NULL, permfile, &perms, 1))
+ barf_perror("Could not create permissions on %s", permfile);
+
+ dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
+ if (mkdir(dir, 0750) != 0)
+ barf_perror("Making dir %s", dir);
+
+ permfile = talloc_strdup(root, "/tool/xenstored");
+ if (!set_perms(NULL, permfile, &perms, 1))
+ barf_perror("Could not create permissions on %s", permfile);
+ talloc_free(root);
+ if (mkdir(xs_daemon_transactions(), 0750) != 0)
+ barf_perror("Could not create transaction dir %s",
+ xs_daemon_transactions());
+}
static struct option options[] = { { "no-fork", 0, NULL, 'N' },
{ "verbose", 0, NULL, 'V' },
@@ -1461,21 +1591,13 @@
barf_perror("Could not listen on sockets");
/* If we're the first, create .perms file for root. */
- if (mkdir(xs_daemon_store(), 0750) == 0) {
- struct xs_permissions perms;
- char *root = talloc_strdup(talloc_autofree_context(), "/");
-
- perms.id = 0;
- perms.perms = XS_PERM_READ;
- if (!set_perms(NULL, root, &perms, 1))
- barf_perror("Could not create permissions in root");
- talloc_free(root);
- mkdir(xs_daemon_transactions(), 0750);
- } else if (errno != EEXIST)
- barf_perror("Could not create root %s", xs_daemon_store());
+ setup_structure();
/* Listen to hypervisor. */
event_fd = domain_init();
+
+ /* Restore existing connections. */
+ restore_existing_connections();
/* Debugging: daemonize() closes standard fds, so dup here. */
tmpout = dup(STDOUT_FILENO);
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_core.h Fri Jul 29 20:25:03 2005
@@ -47,6 +47,14 @@
typedef int connwritefn_t(struct connection *, const void *, unsigned int);
typedef int connreadfn_t(struct connection *, void *, unsigned int);
+enum state
+{
+ /* Blocked by transaction. */
+ BLOCKED,
+ /* Completed */
+ OK,
+};
+
struct connection
{
struct list_head list;
@@ -57,14 +65,17 @@
/* Who am I? 0 for socket connections. */
domid_t id;
- /* Are we blocked waiting for a transaction to end? Contains node. */
- char *blocked;
+ /* Blocked on transaction? */
+ enum state state;
+
+ /* Node we are waiting for (if state == BLOCKED) */
+ char *blocked_by;
/* Is this a read-only connection? */
bool can_write;
/* Are we waiting for a watch event ack? */
- bool waiting_for_ack;
+ struct watch *waiting_for_ack;
/* Buffered incoming data. */
struct buffered_data *in;
@@ -81,10 +92,14 @@
/* The domain I'm associated with, if any. */
struct domain *domain;
+ /* My watches. */
+ struct list_head watches;
+
/* Methods for communicating over this connection: write can be NULL */
connwritefn_t *write;
connreadfn_t *read;
};
+extern struct list_head connections;
/* Return length of string (including nul) at this offset. */
unsigned int get_string(const struct buffered_data *data,
@@ -100,14 +115,14 @@
/* Create a new buffer with lifetime of context. */
struct buffered_data *new_buffer(void *ctx);
-bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
- const void *data, unsigned int len);
+void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
/* Some routines (write, mkdir, etc) just need a non-error return */
-bool send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
/* Send an error: error is usually "errno". */
-bool send_error(struct connection *conn, int error);
+void send_error(struct connection *conn, int error);
/* Canonicalize this path if possible. */
char *canonicalize(struct connection *conn, const char *node);
@@ -147,5 +162,6 @@
void trace_create(const void *data, const char *type);
void trace_destroy(const void *data, const char *type);
void trace_watch_timeout(const struct connection *conn, const char *node,
const char *token);
+void trace(const char *fmt, ...);
#endif /* _XENSTORED_CORE_H */
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_domain.c Fri Jul 29 20:25:03 2005
@@ -239,7 +239,8 @@
* careful that handle_input/handle_output can destroy conn.
*/
while ((domain = find_domain(port)) != NULL) {
- if (!domain->conn->blocked && buffer_has_input(domain->input))
+ if (domain->conn->state == OK
+ && buffer_has_input(domain->input))
handle_input(domain->conn);
else if (domain->conn->out
&& buffer_has_output_room(domain->output))
@@ -254,34 +255,21 @@
#endif
}
-/* domid, mfn, evtchn, path */
-bool do_introduce(struct connection *conn, struct buffered_data *in)
+static struct domain *new_domain(void *context, domid_t domid,
+ unsigned long mfn, int port,
+ const char *path)
{
struct domain *domain;
- char *vec[4];
-
- if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
- return send_error(conn, EINVAL);
-
- if (conn->id != 0)
- return send_error(conn, EACCES);
-
- if (!conn->can_write)
- return send_error(conn, EROFS);
-
- /* Hang domain off "in" until we're finished. */
- domain = talloc(in, struct domain);
- domain->domid = atoi(vec[0]);
- domain->port = atoi(vec[2]);
- if ((domain->port <= 0) || !is_valid_nodename(vec[3]))
- return send_error(conn, EINVAL);
- domain->path = talloc_strdup(domain, vec[3]);
+ domain = talloc(context, struct domain);
+ domain->port = 0;
+ domain->domid = domid;
+ domain->path = talloc_strdup(domain, path);
domain->page = xc_map_foreign_range(*xc_handle, domain->domid,
getpagesize(),
PROT_READ|PROT_WRITE,
- atol(vec[1]));
+ mfn);
if (!domain->page)
- return send_error(conn, errno);
+ return NULL;
list_add(&domain->list, &domains);
talloc_set_destructor(domain, destroy_domain);
@@ -291,15 +279,52 @@
domain->output = domain->page + getpagesize()/2;
/* Tell kernel we're interested in this event. */
- if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0)
- return send_error(conn, errno);
-
+ if (ioctl(eventchn_fd, EVENTCHN_BIND, port) != 0)
+ return NULL;
+
+ domain->port = port;
domain->conn = new_connection(writechn, readchn);
domain->conn->domain = domain;
-
+ return domain;
+}
+
+/* domid, mfn, evtchn, path */
+void do_introduce(struct connection *conn, struct buffered_data *in)
+{
+ struct domain *domain;
+ char *vec[4];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ if (conn->id != 0) {
+ send_error(conn, EACCES);
+ return;
+ }
+
+ if (!conn->can_write) {
+ send_error(conn, EROFS);
+ return;
+ }
+
+ /* Sanity check args. */
+ if ((atoi(vec[2]) <= 0) || !is_valid_nodename(vec[3])) {
+ send_error(conn, EINVAL);
+ return;
+ }
+ /* Hang domain off "in" until we're finished. */
+ domain = new_domain(in, atoi(vec[0]), atol(vec[1]), atol(vec[2]),
+ vec[3]);
+ if (!domain) {
+ send_error(conn, errno);
+ return;
+ }
+
+ /* Now domain belongs to its connection. */
talloc_steal(domain->conn, domain);
-
- return send_ack(conn, XS_INTRODUCE);
+ send_ack(conn, XS_INTRODUCE);
}
static struct domain *find_domain_by_domid(domid_t domid)
@@ -314,39 +339,51 @@
}
/* domid */
-bool do_release(struct connection *conn, const char *domid_str)
+void do_release(struct connection *conn, const char *domid_str)
{
struct domain *domain;
domid_t domid;
- if (!domid_str)
- return send_error(conn, EINVAL);
+ if (!domid_str) {
+ send_error(conn, EINVAL);
+ return;
+ }
domid = atoi(domid_str);
- if (!domid)
- return send_error(conn, EINVAL);
-
- if (conn->id != 0)
- return send_error(conn, EACCES);
+ if (!domid) {
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ if (conn->id != 0) {
+ send_error(conn, EACCES);
+ return;
+ }
domain = find_domain_by_domid(domid);
- if (!domain)
- return send_error(conn, ENOENT);
-
- if (!domain->conn)
- return send_error(conn, EINVAL);
+ if (!domain) {
+ send_error(conn, ENOENT);
+ return;
+ }
+
+ if (!domain->conn) {
+ send_error(conn, EINVAL);
+ return;
+ }
talloc_free(domain->conn);
- return send_ack(conn, XS_RELEASE);
-}
-
-bool do_get_domain_path(struct connection *conn, const char *domid_str)
+ send_ack(conn, XS_RELEASE);
+}
+
+void do_get_domain_path(struct connection *conn, const char *domid_str)
{
struct domain *domain;
domid_t domid;
- if (!domid_str)
- return send_error(conn, EINVAL);
+ if (!domid_str) {
+ send_error(conn, EINVAL);
+ return;
+ }
domid = atoi(domid_str);
if (domid == DOMID_SELF)
@@ -354,11 +391,11 @@
else
domain = find_domain_by_domid(domid);
- if (!domain)
- return send_error(conn, ENOENT);
-
- return send_reply(conn, XS_GETDOMAINPATH, domain->path,
- strlen(domain->path) + 1);
+ if (!domain)
+ send_error(conn, ENOENT);
+ else
+ send_reply(conn, XS_GETDOMAINPATH, domain->path,
+ strlen(domain->path) + 1);
}
static int close_xc_handle(void *_handle)
@@ -373,6 +410,11 @@
if (!conn->domain)
return NULL;
return conn->domain->path;
+}
+
+/* Restore existing connections. */
+void restore_existing_connections(void)
+{
}
/* Returns the event channel handle. */
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_domain.h
--- a/tools/xenstore/xenstored_domain.h Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_domain.h Fri Jul 29 20:25:03 2005
@@ -23,13 +23,13 @@
void handle_event(int event_fd);
/* domid, mfn, eventchn, path */
-bool do_introduce(struct connection *conn, struct buffered_data *in);
+void do_introduce(struct connection *conn, struct buffered_data *in);
/* domid */
-bool do_release(struct connection *conn, const char *domid_str);
+void do_release(struct connection *conn, const char *domid_str);
/* domid */
-bool do_get_domain_path(struct connection *conn, const char *domid_str);
+void do_get_domain_path(struct connection *conn, const char *domid_str);
/* Returns the event channel handle */
int domain_init(void);
@@ -37,4 +37,7 @@
/* Returns the implicit path of a connection (only domains have this) */
const char *get_implicit_path(const struct connection *conn);
+/* Read existing connection information from store. */
+void restore_existing_connections(void);
+
#endif /* _XENSTORED_DOMAIN_H */
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_transaction.c
--- a/tools/xenstore/xenstored_transaction.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_transaction.c Fri Jul 29 20:25:03 2005
@@ -114,7 +114,8 @@
trans = transaction_covering_node(node);
if (trans) {
start_transaction_timeout(trans);
- conn->blocked = talloc_strdup(conn, node);
+ conn->state = BLOCKED;
+ conn->blocked_by = talloc_strdup(conn, node);
return true;
}
return false;
@@ -239,20 +240,24 @@
return true;
}
-bool do_transaction_start(struct connection *conn, const char *node)
+void do_transaction_start(struct connection *conn, const char *node)
{
struct transaction *transaction;
char *dir;
- if (conn->transaction)
- return send_error(conn, EBUSY);
+ if (conn->transaction) {
+ send_error(conn, EBUSY);
+ return;
+ }
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_READ))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, node, XS_PERM_READ)) {
+ send_error(conn, errno);
+ return;
+ }
if (transaction_block(conn, node))
- return true;
+ return;
dir = node_dir_outside_transaction(node);
@@ -270,18 +275,19 @@
talloc_set_destructor(transaction, destroy_transaction);
trace_create(transaction, "transaction");
- if (!copy_dir(dir, transaction->divert))
- return send_error(conn, errno);
+ if (!copy_dir(dir, transaction->divert)) {
+ send_error(conn, errno);
+ return;
+ }
talloc_steal(conn, transaction);
conn->transaction = transaction;
- return send_ack(transaction->conn, XS_TRANSACTION_START);
+ send_ack(transaction->conn, XS_TRANSACTION_START);
}
static bool commit_transaction(struct transaction *trans)
{
char *tmp, *dir;
- struct changed_node *i;
/* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */
dir = node_dir_outside_transaction(trans->node);
@@ -294,39 +300,44 @@
trans->divert, dir);
trans->divert = tmp;
-
- /* Fire off the watches for everything that changed. */
- list_for_each_entry(i, &trans->changes, list)
- fire_watches(NULL, i->node, i->recurse);
return true;
}
-bool do_transaction_end(struct connection *conn, const char *arg)
-{
- if (!arg || (!streq(arg, "T") && !streq(arg, "F")))
- return send_error(conn, EINVAL);
-
- if (!conn->transaction)
- return send_error(conn, ENOENT);
+void do_transaction_end(struct connection *conn, const char *arg)
+{
+ struct changed_node *i;
+ struct transaction *trans;
+
+ if (!arg || (!streq(arg, "T") && !streq(arg, "F"))) {
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ if (!conn->transaction) {
+ send_error(conn, ENOENT);
+ return;
+ }
+
+ /* Set to NULL so fire_watches sends events. */
+ trans = conn->transaction;
+ conn->transaction = NULL;
+ /* Attach transaction to arg for auto-cleanup */
+ talloc_steal(arg, trans);
if (streq(arg, "T")) {
- if (conn->transaction->destined_to_fail) {
+ if (trans->destined_to_fail) {
send_error(conn, ETIMEDOUT);
- goto failed;
+ return;
}
- if (!commit_transaction(conn->transaction)) {
+ if (!commit_transaction(trans)) {
send_error(conn, errno);
- goto failed;
+ return;
}
- }
-
- talloc_free(conn->transaction);
- conn->transaction = NULL;
- return send_ack(conn, XS_TRANSACTION_END);
-
-failed:
- talloc_free(conn->transaction);
- conn->transaction = NULL;
- return false;
-}
-
+
+ /* Fire off the watches for everything that changed. */
+ list_for_each_entry(i, &trans->changes, list)
+ fire_watches(conn, i->node, i->recurse);
+ }
+ send_ack(conn, XS_TRANSACTION_END);
+}
+
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_transaction.h
--- a/tools/xenstore/xenstored_transaction.h Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_transaction.h Fri Jul 29 20:25:03 2005
@@ -22,8 +22,8 @@
struct transaction;
-bool do_transaction_start(struct connection *conn, const char *node);
-bool do_transaction_end(struct connection *conn, const char *arg);
+void do_transaction_start(struct connection *conn, const char *node);
+void do_transaction_end(struct connection *conn, const char *arg);
/* Is node covered by this transaction? */
bool within_transaction(struct transaction *trans, const char *node);
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_watch.c Fri Jul 29 20:25:03 2005
@@ -33,76 +33,43 @@
#include "xenstored_domain.h"
/* FIXME: time out unacked watches. */
-
-/* We create this if anyone is interested "node", then we pass it from
- * watch to watch as each connection acks it.
- */
struct watch_event
{
- /* The watch we are firing for (watch->events) */
+ /* The events on this watch. */
struct list_head list;
- /* Watches we need to fire for (watches[0]->events == this). */
- struct watch **watches;
- unsigned int num_watches;
-
- struct timeval timeout;
-
- /* Name of node which changed. */
- char *node;
-
- /* For remove, we trigger on all the children of this node too. */
- bool recurse;
+ /* Data to send (node\0token\0). */
+ unsigned int len;
+ char *data;
};
struct watch
{
+ /* Watches on this connection */
struct list_head list;
- unsigned int priority;
/* Current outstanding events applying to this watch. */
struct list_head events;
/* Is this relative to connnection's implicit path? */
- bool relative;
+ const char *relative_path;
char *token;
char *node;
- struct connection *conn;
};
-static LIST_HEAD(watches);
-
-static struct watch_event *get_first_event(struct connection *conn)
-{
- struct watch *watch;
- struct watch_event *event;
-
- /* Find first watch with an event. */
- list_for_each_entry(watch, &watches, list) {
- if (watch->conn != conn)
- continue;
-
- event = list_top(&watch->events, struct watch_event, list);
- if (event)
- return event;
- }
- return NULL;
-}
/* Look through our watches: if any of them have an event, queue it. */
void queue_next_event(struct connection *conn)
{
struct watch_event *event;
- const char *node;
- char *buffer;
- unsigned int len;
+ struct watch *watch;
/* We had a reply queued already? Send it: other end will
* discard watch. */
if (conn->waiting_reply) {
conn->out = conn->waiting_reply;
conn->waiting_reply = NULL;
- conn->waiting_for_ack = false;
+ conn->waiting_for_ack = NULL;
return;
}
@@ -110,170 +77,83 @@
if (conn->waiting_for_ack)
return;
- event = get_first_event(conn);
- if (!event)
- return;
-
- /* If we decide to cancel, we will reset this. */
- conn->waiting_for_ack = true;
-
- /* If we deleted /foo and they're watching /foo/bar, that's what we
- * tell them has changed. */
- if (!is_child(event->node, event->watches[0]->node)) {
- assert(event->recurse);
- node = event->watches[0]->node;
- } else
- node = event->node;
-
- /* If watch placed using relative path, give them relative answer. */
- if (event->watches[0]->relative) {
- node += strlen(get_implicit_path(conn));
- if (node[0] == '/') /* Could be "". */
+ list_for_each_entry(watch, &conn->watches, list) {
+ event = list_top(&watch->events, struct watch_event, list);
+ if (event) {
+ conn->waiting_for_ack = watch;
+ send_reply(conn,XS_WATCH_EVENT,event->data,event->len);
+ break;
+ }
+ }
+}
+
+static int destroy_watch_event(void *_event)
+{
+ struct watch_event *event = _event;
+
+ trace_destroy(event, "watch_event");
+ return 0;
+}
+
+static void add_event(struct watch *watch, const char *node)
+{
+ struct watch_event *event;
+
+ if (watch->relative_path) {
+ node += strlen(watch->relative_path);
+ if (*node == '/') /* Could be "" */
node++;
}
- /* Create reply from path and token */
- len = strlen(node) + 1 + strlen(event->watches[0]->token) + 1;
- buffer = talloc_array(conn, char, len);
- strcpy(buffer, node);
- strcpy(buffer+strlen(node)+1, event->watches[0]->token);
- send_reply(conn, XS_WATCH_EVENT, buffer, len);
- talloc_free(buffer);
-}
-
-static struct watch **find_watches(const char *node, bool recurse,
- unsigned int *num)
-{
- struct watch *i;
- struct watch **ret = NULL;
-
- *num = 0;
-
- /* We include children too if this is an rm. */
- list_for_each_entry(i, &watches, list) {
- if (is_child(node, i->node) ||
- (recurse && is_child(i->node, node))) {
- (*num)++;
- ret = talloc_realloc(node, ret, struct watch *, *num);
- ret[*num - 1] = i;
- }
- }
- return ret;
+ event = talloc(watch, struct watch_event);
+ event->len = strlen(node) + 1 + strlen(watch->token) + 1;
+ event->data = talloc_array(event, char, event->len);
+ strcpy(event->data, node);
+ strcpy(event->data + strlen(node) + 1, watch->token);
+ talloc_set_destructor(event, destroy_watch_event);
+ list_add_tail(&event->list, &watch->events);
+ trace_create(event, "watch_event");
}
/* FIXME: we fail to fire on out of memory. Should drop connections. */
-void fire_watches(struct transaction *trans, const char *node, bool recurse)
-{
- struct watch **watches;
- struct watch_event *event;
- unsigned int num_watches;
+void fire_watches(struct connection *conn, const char *node, bool recurse)
+{
+ struct connection *i;
+ struct watch *watch;
/* During transactions, don't fire watches. */
- if (trans)
- return;
-
- watches = find_watches(node, recurse, &num_watches);
- if (!watches)
- return;
-
- /* Create and fill in info about event. */
- event = talloc(talloc_autofree_context(), struct watch_event);
- event->node = talloc_strdup(event, node);
-
- /* Tie event to this watch. */
- event->watches = watches;
- talloc_steal(event, watches);
- event->num_watches = num_watches;
- event->recurse = recurse;
- list_add_tail(&event->list, &watches[0]->events);
-
- /* Warn if not finished after thirty seconds. */
- gettimeofday(&event->timeout, NULL);
- event->timeout.tv_sec += 30;
-
- /* If connection not doing anything, queue this. */
- if (!watches[0]->conn->out)
- queue_next_event(watches[0]->conn);
-}
-
-/* We're done with this event: see if anyone else wants it. */
-static void move_event_onwards(struct watch_event *event)
-{
- list_del(&event->list);
-
- event->num_watches--;
- event->watches++;
- if (!event->num_watches) {
- talloc_free(event);
- return;
- }
-
- list_add_tail(&event->list, &event->watches[0]->events);
-
- /* If connection not doing anything, queue this. */
- if (!event->watches[0]->conn->out)
- queue_next_event(event->watches[0]->conn);
-}
-
-static void remove_watch_from_events(struct watch *dying_watch)
-{
- struct watch *watch;
- struct watch_event *event;
- unsigned int i;
-
- list_for_each_entry(watch, &watches, list) {
- list_for_each_entry(event, &watch->events, list) {
- for (i = 0; i < event->num_watches; i++) {
- if (event->watches[i] != dying_watch)
- continue;
-
- assert(i != 0);
- memmove(event->watches+i,
- event->watches+i+1,
- (event->num_watches - (i+1))
- * sizeof(struct watch *));
- event->num_watches--;
- }
+ if (conn->transaction)
+ return;
+
+ /* Create an event for each watch. Don't send to self. */
+ list_for_each_entry(i, &connections, list) {
+ if (i == conn)
+ continue;
+
+ list_for_each_entry(watch, &i->watches, list) {
+ if (is_child(node, watch->node))
+ add_event(watch, node);
+ else if (recurse && is_child(watch->node, node))
+ add_event(watch, watch->node);
+ else
+ continue;
+ /* If connection not doing anything, queue this. */
+ if (!i->out)
+ queue_next_event(i);
}
}
}
static int destroy_watch(void *_watch)
{
- struct watch *watch = _watch;
- struct watch_event *event;
-
- /* If we have pending events, pass them on to others. */
- while ((event = list_top(&watch->events, struct watch_event, list)))
- move_event_onwards(event);
-
- /* Remove from global list. */
- list_del(&watch->list);
-
- /* Other events which match this watch must be cleared. */
- remove_watch_from_events(watch);
-
- trace_destroy(watch, "watch");
+ trace_destroy(_watch, "watch");
return 0;
}
-/* We keep watches in priority order. */
-static void insert_watch(struct watch *watch)
-{
- struct watch *i;
-
- list_for_each_entry(i, &watches, list) {
- if (i->priority <= watch->priority) {
- list_add_tail(&watch->list, &i->list);
- return;
- }
- }
-
- list_add_tail(&watch->list, &watches);
-}
-
void shortest_watch_ack_timeout(struct timeval *tv)
{
+ (void)tv;
+#if 0 /* FIXME */
struct watch *watch;
list_for_each_entry(watch, &watches, list) {
@@ -285,10 +165,12 @@
*tv = i->timeout;
}
}
+#endif
}
void check_watch_ack_timeout(void)
{
+#if 0
struct watch *watch;
struct timeval now;
@@ -308,77 +190,97 @@
}
}
}
-}
-
-bool do_watch(struct connection *conn, struct buffered_data *in)
-{
- struct watch *watch;
- char *vec[3];
+#endif
+}
+
+void do_watch(struct connection *conn, struct buffered_data *in)
+{
+ struct watch *watch;
+ char *vec[2];
bool relative;
- if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
- return send_error(conn, EINVAL);
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) {
+ send_error(conn, EINVAL);
+ return;
+ }
relative = !strstarts(vec[0], "/");
vec[0] = canonicalize(conn, vec[0]);
- if (!check_node_perms(conn, vec[0], XS_PERM_READ))
- return send_error(conn, errno);
+ if (!check_node_perms(conn, vec[0], XS_PERM_READ)) {
+ send_error(conn, errno);
+ return;
+ }
watch = talloc(conn, struct watch);
watch->node = talloc_strdup(watch, vec[0]);
watch->token = talloc_strdup(watch, vec[1]);
- watch->conn = conn;
- watch->priority = strtoul(vec[2], NULL, 0);
- watch->relative = relative;
+ if (relative)
+ watch->relative_path = get_implicit_path(conn);
+ else
+ watch->relative_path = NULL;
+
INIT_LIST_HEAD(&watch->events);
- insert_watch(watch);
+ list_add_tail(&watch->list, &conn->watches);
+ trace_create(watch, "watch");
talloc_set_destructor(watch, destroy_watch);
- trace_create(watch, "watch");
- return send_ack(conn, XS_WATCH);
-}
-
-bool do_watch_ack(struct connection *conn, const char *token)
+ send_ack(conn, XS_WATCH);
+}
+
+void do_watch_ack(struct connection *conn, const char *token)
{
struct watch_event *event;
- if (!token)
- return send_error(conn, EINVAL);
-
- if (!conn->waiting_for_ack)
- return send_error(conn, ENOENT);
-
- event = get_first_event(conn);
- if (!streq(event->watches[0]->token, token))
- return send_error(conn, EINVAL);
-
- move_event_onwards(event);
- conn->waiting_for_ack = false;
- return send_ack(conn, XS_WATCH_ACK);
-}
-
-bool do_unwatch(struct connection *conn, struct buffered_data *in)
+ if (!token) {
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ if (!conn->waiting_for_ack) {
+ send_error(conn, ENOENT);
+ return;
+ }
+
+ if (!streq(conn->waiting_for_ack->token, token)) {
+ /* They're confused: this will cause us to send event again */
+ conn->waiting_for_ack = NULL;
+ send_error(conn, EINVAL);
+ return;
+ }
+
+ /* Remove event: after ack sent, core will call queue_next_event */
+ event = list_top(&conn->waiting_for_ack->events, struct watch_event,
+ list);
+ list_del(&event->list);
+ talloc_free(event);
+
+ conn->waiting_for_ack = NULL;
+ send_ack(conn, XS_WATCH_ACK);
+}
+
+void do_unwatch(struct connection *conn, struct buffered_data *in)
{
struct watch *watch;
char *node, *vec[2];
- if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
- return send_error(conn, EINVAL);
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) {
+ send_error(conn, EINVAL);
+ return;
+ }
/* We don't need to worry if we're waiting for an ack for the
* watch we're deleting: conn->waiting_for_ack was reset by
* this command in consider_message anyway. */
node = canonicalize(conn, vec[0]);
- list_for_each_entry(watch, &watches, list) {
- if (watch->conn != conn)
- continue;
-
+ list_for_each_entry(watch, &conn->watches, list) {
if (streq(watch->node, node) && streq(watch->token, vec[1])) {
+ list_del(&watch->list);
talloc_free(watch);
- return send_ack(conn, XS_UNWATCH);
- }
- }
- return send_error(conn, ENOENT);
+ send_ack(conn, XS_UNWATCH);
+ return;
+ }
+ }
+ send_error(conn, ENOENT);
}
#ifdef TESTING
@@ -387,15 +289,16 @@
struct watch *watch;
struct watch_event *event;
- /* Find first watch with an event. */
- list_for_each_entry(watch, &watches, list) {
- if (watch->conn != conn)
- continue;
-
- printf(" watch on %s token %s prio %i\n",
- watch->node, watch->token, watch->priority);
+ if (conn->waiting_for_ack)
+ printf(" waiting_for_ack for watch on %s token %s\n",
+ conn->waiting_for_ack->node,
+ conn->waiting_for_ack->token);
+
+ list_for_each_entry(watch, &conn->watches, list) {
+ printf(" watch on %s token %s\n",
+ watch->node, watch->token);
list_for_each_entry(event, &watch->events, list)
- printf(" event: %s\n", event->node);
+ printf(" event: %s\n", event->data);
}
}
#endif
diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_watch.h
--- a/tools/xenstore/xenstored_watch.h Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xenstored_watch.h Fri Jul 29 20:25:03 2005
@@ -22,9 +22,9 @@
#include "xenstored_core.h"
-bool do_watch(struct connection *conn, struct buffered_data *in);
-bool do_watch_ack(struct connection *conn, const char *token);
-bool do_unwatch(struct connection *conn, struct buffered_data *in);
+void do_watch(struct connection *conn, struct buffered_data *in);
+void do_watch_ack(struct connection *conn, const char *token);
+void do_unwatch(struct connection *conn, struct buffered_data *in);
/* Is this a watch event message for this connection? */
bool is_watch_event(struct connection *conn, struct buffered_data *out);
@@ -32,8 +32,9 @@
/* Look through our watches: if any of them have an event, queue it. */
void queue_next_event(struct connection *conn);
-/* Fire all watches: recurse means all the children are effected (ie. rm) */
-void fire_watches(struct transaction *trans, const char *node, bool recurse);
+/* Fire all watches: recurse means all the children are effected (ie. rm).
+ */
+void fire_watches(struct connection *conn, const char *node, bool recurse);
/* Find shortest timeout: if any, reduce tv (may already be set). */
void shortest_watch_ack_timeout(struct timeval *tv);
diff -r a4196568095c -r b53a65034532 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xs.c Fri Jul 29 20:25:03 2005
@@ -401,22 +401,16 @@
/* Watch a node for changes (poll on fd to detect, or call read_watch()).
* When the node (or any child) changes, fd will become readable.
* Token is returned when watch is read, to allow matching.
- * Priority indicates order if multiple watchers: higher is first.
* Returns false on failure.
*/
-bool xs_watch(struct xs_handle *h, const char *path, const char *token,
- unsigned int priority)
-{
- char prio[MAX_STRLEN(priority)];
- struct iovec iov[3];
-
- sprintf(prio, "%u", priority);
+bool xs_watch(struct xs_handle *h, const char *path, const char *token)
+{
+ struct iovec iov[2];
+
iov[0].iov_base = (void *)path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (void *)token;
iov[1].iov_len = strlen(token) + 1;
- iov[2].iov_base = prio;
- iov[2].iov_len = strlen(prio) + 1;
return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
}
diff -r a4196568095c -r b53a65034532 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xs.h Fri Jul 29 20:25:03 2005
@@ -82,11 +82,9 @@
/* Watch a node for changes (poll on fd to detect, or call read_watch()).
* When the node (or any child) changes, fd will become readable.
* Token is returned when watch is read, to allow matching.
- * Priority indicates order if multiple watchers: higher is first.
* Returns false on failure.
*/
-bool xs_watch(struct xs_handle *h, const char *path, const char *token,
- unsigned int priority);
+bool xs_watch(struct xs_handle *h, const char *path, const char *token);
/* Return the FD to poll on to see if a watch has fired. */
int xs_fileno(struct xs_handle *h);
diff -r a4196568095c -r b53a65034532 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xs_random.c Fri Jul 29 20:25:03 2005
@@ -987,6 +987,8 @@
char *cmd = talloc_asprintf(NULL, "echo -n r0 > %s/.perms", dir);
if (mkdir(dir, 0700) != 0)
barf_perror("Creating directory %s", dir);
+ if (mkdir(talloc_asprintf(cmd, "%s/tool", dir), 0700) != 0)
+ barf_perror("Creating directory %s/tool", dir);
do_command(cmd);
talloc_free(cmd);
}
@@ -1211,6 +1213,10 @@
char *nodename;
bool ret = false;
+ /* Ignore tool/ dir. */
+ if (streq(node, "/tool"))
+ return true;
+
/* FILE backend expects talloc'ed pointer. */
nodename = talloc_strdup(NULL, node);
permsa = a->get_perms(ah, nodename, &numpermsa);
diff -r a4196568095c -r b53a65034532 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c Fri Jul 29 18:52:33 2005
+++ b/tools/xenstore/xs_test.c Fri Jul 29 20:25:03 2005
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
@@ -33,6 +34,10 @@
#define XSTEST
static struct xs_handle *handles[10] = { NULL };
+static unsigned int children;
+
+static bool timeout = true;
+static bool readonly = false;
struct ringbuf_head
{
@@ -80,6 +85,14 @@
return buf + h->read;
}
+static int output_avail(struct ringbuf_head *out)
+{
+ unsigned int avail;
+
+ get_output_chunk(out, out->buf, &avail);
+ return avail != 0;
+}
+
static void update_output_chunk(struct ringbuf_head *h, uint32_t len)
{
h->write += len;
@@ -99,10 +112,12 @@
void *data, unsigned int len)
{
unsigned int avail;
+ int was_full;
if (!check_buffer(in))
barf("Corrupt buffer");
+ was_full = !output_avail(in);
while (len) {
const void *src = get_input_chunk(in, in->buf, &avail);
if (avail > len)
@@ -114,7 +129,8 @@
}
/* Tell other end we read something. */
- kill(daemon_pid, SIGUSR2);
+ if (was_full)
+ kill(daemon_pid, SIGUSR2);
return true;
}
@@ -173,7 +189,9 @@
" getperm <path>\n"
" setperm <path> <id> <flags> ...\n"
" shutdown\n"
- " watch <path> <token> <prio>\n"
+ " watch <path> <token>\n"
+ " async <command>...\n"
+ " asyncwait\n"
" waitwatch\n"
" ackwatch <token>\n"
" unwatch <path> <token>\n"
@@ -186,22 +204,34 @@
" dump\n");
}
+static int argpos(const char *line, unsigned int num)
+{
+ unsigned int i, len = 0, off = 0;
+
+ for (i = 0; i <= num; i++) {
+ off += len;
+ off += strspn(line + off, " \t\n");
+ len = strcspn(line + off, " \t\n");
+ if (!len)
+ return off;
+ }
+ return off;
+}
+
static char *arg(char *line, unsigned int num)
{
static char *args[10];
- unsigned int i, len = 0;
-
- for (i = 0; i <= num; i++) {
- line += len;
- line += strspn(line, " \t\n");
- len = strcspn(line, " \t\n");
- if (!len)
- barf("Can't get arg %u", num);
- }
+ unsigned int off, len;
+
+ off = argpos(line, num);
+ len = strcspn(line + off, " \t\n");
+
+ if (!len)
+ barf("Can't get arg %u", num);
free(args[num]);
args[num] = malloc(len + 1);
- memcpy(args[num], line, len);
+ memcpy(args[num], line+off, len);
args[num][len] = '\0';
return args[num];
}
@@ -360,10 +390,9 @@
failed(handle);
}
-static void do_watch(unsigned int handle, const char *node, const char *token,
- const char *pri)
-{
- if (!xs_watch(handles[handle], node, token, atoi(pri)))
+static void do_watch(unsigned int handle, const char *node, const char *token)
+{
+ if (!xs_watch(handles[handle], node, token))
failed(handle);
}
@@ -386,6 +415,82 @@
{
if (!xs_acknowledge_watch(handles[handle], token))
failed(handle);
+}
+
+static bool wait_for_input(unsigned int handle)
+{
+ unsigned int i;
+ for (i = 0; i < ARRAY_SIZE(handles); i++) {
+ int fd;
+
+ if (!handles[i] || i == handle)
+ continue;
+
+ fd = xs_fileno(handles[i]);
+ if (fd == -2) {
+ unsigned int avail;
+ get_input_chunk(in, in->buf, &avail);
+ if (avail != 0)
+ return true;
+ } else {
+ struct timeval tv = {.tv_sec = 0, .tv_usec = 0 };
+ fd_set set;
+
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+ if (select(fd+1, &set, NULL, NULL,&tv))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/* Async wait for watch on handle */
+static void do_command(unsigned int default_handle, char *line);
+static void do_async(unsigned int handle, char *line)
+{
+ int child;
+ unsigned int i;
+ children++;
+ if ((child = fork()) != 0) {
+ /* Wait until *something* happens, which indicates
+ * child has created an event. V. sloppy, but we can't
+ * select on fake domain connections.
+ */
+ while (!wait_for_input(handle));
+ return;
+ }
+
+ /* Don't keep other handles open in parent. */
+ for (i = 0; i < ARRAY_SIZE(handles); i++) {
+ if (handles[i] && i != handle) {
+ xs_daemon_close(handles[i]);
+ handles[i] = NULL;
+ }
+ }
+
+ do_command(handle, line + argpos(line, 1));
+ exit(0);
+}
+
+static void do_asyncwait(unsigned int handle)
+{
+ int status;
+
+ if (handle)
+ barf("handle has no meaning with asyncwait");
+
+ if (children == 0)
+ barf("No children to wait for!");
+
+ if (waitpid(0, &status, 0) > 0) {
+ if (!WIFEXITED(status))
+ barf("async died");
+ if (WEXITSTATUS(status))
+ exit(WEXITSTATUS(status));
+ }
+ children--;
}
static void do_unwatch(unsigned int handle, const char *node, const char
*token)
@@ -533,23 +638,106 @@
free(subdirs);
}
+static int handle;
+
+static void alarmed(int sig __attribute__((unused)))
+{
+ if (handle) {
+ char handlename[10];
+ sprintf(handlename, "%u:", handle);
+ write(STDOUT_FILENO, handlename, strlen(handlename));
+ }
+ write(STDOUT_FILENO, command, strlen(command));
+ write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n"));
+ exit(1);
+}
+
+static void do_command(unsigned int default_handle, char *line)
+{
+ char *endp;
+
+ if (strspn(line, " \n") == strlen(line))
+ return;
+ if (strstarts(line, "#"))
+ return;
+
+ handle = strtoul(line, &endp, 10);
+ if (endp != line)
+ memmove(line, endp+1, strlen(endp));
+ else
+ handle = default_handle;
+
+ if (!handles[handle]) {
+ if (readonly)
+ handles[handle] = xs_daemon_open_readonly();
+ else
+ handles[handle] = xs_daemon_open();
+ if (!handles[handle])
+ barf_perror("Opening connection to daemon");
+ }
+ command = arg(line, 0);
+
+ if (timeout)
+ alarm(1);
+
+ if (streq(command, "dir"))
+ do_dir(handle, arg(line, 1));
+ else if (streq(command, "read"))
+ do_read(handle, arg(line, 1));
+ else if (streq(command, "write"))
+ do_write(handle,
+ arg(line, 1), arg(line, 2), arg(line, 3));
+ else if (streq(command, "setid"))
+ do_setid(handle, arg(line, 1));
+ else if (streq(command, "mkdir"))
+ do_mkdir(handle, arg(line, 1));
+ else if (streq(command, "rm"))
+ do_rm(handle, arg(line, 1));
+ else if (streq(command, "getperm"))
+ do_getperm(handle, arg(line, 1));
+ else if (streq(command, "setperm"))
+ do_setperm(handle, arg(line, 1), line);
+ else if (streq(command, "shutdown"))
+ do_shutdown(handle);
+ else if (streq(command, "watch"))
+ do_watch(handle, arg(line, 1), arg(line, 2));
+ else if (streq(command, "waitwatch"))
+ do_waitwatch(handle);
+ else if (streq(command, "async"))
+ do_async(handle, line);
+ else if (streq(command, "asyncwait"))
+ do_asyncwait(handle);
+ else if (streq(command, "ackwatch"))
+ do_ackwatch(handle, arg(line, 1));
+ else if (streq(command, "unwatch"))
+ do_unwatch(handle, arg(line, 1), arg(line, 2));
+ else if (streq(command, "close")) {
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "start"))
+ do_start(handle, arg(line, 1));
+ else if (streq(command, "commit"))
+ do_end(handle, false);
+ else if (streq(command, "abort"))
+ do_end(handle, true);
+ else if (streq(command, "introduce"))
+ do_introduce(handle, arg(line, 1), arg(line, 2),
+ arg(line, 3), arg(line, 4));
+ else if (streq(command, "release"))
+ do_release(handle, arg(line, 1));
+ else if (streq(command, "dump"))
+ dump(handle);
+ else if (streq(command, "sleep"))
+ sleep(atoi(arg(line, 1)));
+ else
+ barf("Unknown command %s", command);
+ fflush(stdout);
+ alarm(0);
+}
+
int main(int argc, char *argv[])
{
char line[1024];
- bool readonly = false, timeout = true;
- int handle;
-
- static void alarmed(int sig __attribute__((unused)))
- {
- if (handle) {
- char handlename[10];
- sprintf(handlename, "%u:", handle);
- write(STDOUT_FILENO, handlename, strlen(handlename));
- }
- write(STDOUT_FILENO, command, strlen(command));
- write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n"));
- exit(1);
- }
if (argc > 1 && streq(argv[1], "--readonly")) {
readonly = true;
@@ -557,7 +745,7 @@
argv++;
}
- if (argc > 1 && streq(argv[1], "--notimeout")) {
+ if (argc > 1 && streq(argv[1], "--no-timeout")) {
timeout = false;
argc--;
argv++;
@@ -570,81 +758,10 @@
ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
signal(SIGALRM, alarmed);
- while (fgets(line, sizeof(line), stdin)) {
- char *endp;
-
- if (strspn(line, " \n") == strlen(line))
- continue;
- if (strstarts(line, "#"))
- continue;
-
- handle = strtoul(line, &endp, 10);
- if (endp != line)
- memmove(line, endp+1, strlen(endp));
- else
- handle = 0;
-
- if (!handles[handle]) {
- if (readonly)
- handles[handle] = xs_daemon_open_readonly();
- else
- handles[handle] = xs_daemon_open();
- if (!handles[handle])
- barf_perror("Opening connection to daemon");
- }
- command = arg(line, 0);
-
- if (timeout)
- alarm(5);
- if (streq(command, "dir"))
- do_dir(handle, arg(line, 1));
- else if (streq(command, "read"))
- do_read(handle, arg(line, 1));
- else if (streq(command, "write"))
- do_write(handle,
- arg(line, 1), arg(line, 2), arg(line, 3));
- else if (streq(command, "setid"))
- do_setid(handle, arg(line, 1));
- else if (streq(command, "mkdir"))
- do_mkdir(handle, arg(line, 1));
- else if (streq(command, "rm"))
- do_rm(handle, arg(line, 1));
- else if (streq(command, "getperm"))
- do_getperm(handle, arg(line, 1));
- else if (streq(command, "setperm"))
- do_setperm(handle, arg(line, 1), line);
- else if (streq(command, "shutdown"))
- do_shutdown(handle);
- else if (streq(command, "watch"))
- do_watch(handle, arg(line, 1), arg(line, 2), arg(line,
3));
- else if (streq(command, "waitwatch"))
- do_waitwatch(handle);
- else if (streq(command, "ackwatch"))
- do_ackwatch(handle, arg(line, 1));
- else if (streq(command, "unwatch"))
- do_unwatch(handle, arg(line, 1), arg(line, 2));
- else if (streq(command, "close")) {
- xs_daemon_close(handles[handle]);
- handles[handle] = NULL;
- } else if (streq(command, "start"))
- do_start(handle, arg(line, 1));
- else if (streq(command, "commit"))
- do_end(handle, false);
- else if (streq(command, "abort"))
- do_end(handle, true);
- else if (streq(command, "introduce"))
- do_introduce(handle, arg(line, 1), arg(line, 2),
- arg(line, 3), arg(line, 4));
- else if (streq(command, "release"))
- do_release(handle, arg(line, 1));
- else if (streq(command, "dump"))
- dump(handle);
- else if (streq(command, "sleep"))
- sleep(atoi(arg(line, 1)));
- else
- barf("Unknown command %s", command);
- fflush(stdout);
- alarm(0);
- }
+ while (fgets(line, sizeof(line), stdin))
+ do_command(0, line);
+
+ while (children)
+ do_asyncwait(0);
return 0;
}
diff -r a4196568095c -r b53a65034532 xen/Makefile
--- a/xen/Makefile Fri Jul 29 18:52:33 2005
+++ b/xen/Makefile Fri Jul 29 20:25:03 2005
@@ -50,10 +50,10 @@
$(MAKE) -C arch/$(TARGET_ARCH) clean
rm -f include/asm *.o $(TARGET)* *~ core
rm -f include/asm-*/asm-offsets.h
- rm -f tools/figlet/*.o tools/figlet/figlet
rm -f include/xen/acm_policy.h
$(TARGET): delete-unfresh-files
+ $(MAKE) -C tools
$(MAKE) include/xen/compile.h
$(MAKE) include/xen/acm_policy.h
[ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm
@@ -71,7 +71,6 @@
delete-unfresh-files:
@if [ ! -r include/xen/compile.h -o -O include/xen/compile.h ]; then \
rm -f include/xen/{banner,compile}.h; \
- $(MAKE) -C arch/$(TARGET_ARCH) delete-unfresh-files; \
fi
# acm_policy.h contains security policy for Xen
@@ -96,21 +95,16 @@
-e 's/@@whoami@@/$(shell whoami)/g' \
-e 's/@@domain@@/$(shell ([ -x /bin/dnsdomainname ] &&
/bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo
[unknown]))/g' \
-e 's/@@hostname@@/$(shell hostname)/g' \
- -e 's/@@compiler@@/$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -1)/g' \
+ -e 's/@@compiler@@/$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -n 1)/g' \
-e 's/@@version@@/$(XEN_VERSION)/g' \
-e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \
-e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
- -e 's!@@changeset@@!$(shell (hg parents | awk -F:
'/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}')
2>/dev/null || (head -6 ChangeLog | awk -F: '/^changeset/{CS=$$3};{FS="date:[
]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || echo information
unavailable)!g' \
+ -e 's!@@changeset@@!$(shell (hg parents | awk -F:
'/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}')
2>/dev/null || (head -n 6 ChangeLog | awk -F: '/^changeset/{CS=$$3};{FS="date:[
]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || echo information
unavailable)!g' \
< include/xen/compile.h.in > $@.new
@cat include/xen/banner.h >> $@.new
@mv -f $@.new $@
-tools/figlet/figlet: tools/figlet/figlet.o
- $(HOSTCC) -o $@ $<
-tools/figlet/figlet.o: tools/figlet/figlet.c
- $(HOSTCC) -o $@ -c $<
-
-include/xen/banner.h: tools/figlet/figlet tools/figlet/xen.flf
+include/xen/banner.h:
tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) > $@.new
@mv -f $@.new $@
@@ -147,4 +141,4 @@
$(all_sources) > cscope.files
cscope -k -b -q
MAP:
- nm $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw]
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
+ $(NM) $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw]
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
diff -r a4196568095c -r b53a65034532 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c Fri Jul 29 18:52:33 2005
+++ b/xen/acm/acm_core.c Fri Jul 29 20:25:03 2005
@@ -5,6 +5,9 @@
*
* Author:
* Reiner Sailer <sailer@xxxxxxxxxxxxxx>
+ *
+ * Contributors:
+ * Stefan Berger <stefanb@xxxxxxxxxxxxxx>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -25,6 +28,7 @@
#include <xen/lib.h>
#include <xen/delay.h>
#include <xen/sched.h>
+#include <xen/multiboot.h>
#include <acm/acm_hooks.h>
#include <acm/acm_endian.h>
@@ -81,9 +85,68 @@
acm_bin_pol.secondary_binary_policy = secondary;
}
+static int
+acm_setup(unsigned int *initrdidx,
+ const multiboot_info_t *mbi,
+ unsigned long initial_images_start)
+{
+ int i;
+ module_t *mod = (module_t *)__va(mbi->mods_addr);
+ int rc = ACM_OK;
+
+ if (mbi->mods_count > 1)
+ *initrdidx = 1;
+
+ /*
+ * Try all modules and see whichever could be the binary policy.
+ * Adjust the initrdidx if module[1] is the binary policy.
+ */
+ for (i = mbi->mods_count-1; i >= 1; i--) {
+ struct acm_policy_buffer *pol;
+ char *_policy_start;
+ unsigned long _policy_len;
+#if defined(__i386__)
+ _policy_start = (char *)(initial_images_start +
(mod[i].mod_start-mod[0].mod_start));
+#elif defined(__x86_64__)
+ _policy_start = __va(initial_images_start +
(mod[i].mod_start-mod[0].mod_start));
+#else
+#error Architecture unsupported by sHype
+#endif
+ _policy_len = mod[i].mod_end - mod[i].mod_start;
+ if (_policy_len < sizeof(struct acm_policy_buffer))
+ continue; /* not a policy */
+
+ pol = (struct acm_policy_buffer *)_policy_start;
+ if (ntohl(pol->magic) == ACM_MAGIC) {
+ rc = acm_set_policy((void *)_policy_start,
+ (u16)_policy_len,
+ ACM_USE_SECURITY_POLICY,
+ 0);
+ if (rc == ACM_OK) {
+ printf("Policy len 0x%lx, start at
%p.\n",_policy_len,_policy_start);
+ if (i == 1) {
+ if (mbi->mods_count > 2) {
+ *initrdidx = 2;
+ } else {
+ *initrdidx = 0;
+ }
+ } else {
+ *initrdidx = 1;
+ }
+ break;
+ } else {
+ printk("Invalid policy. %d.th module line.\n", i+1);
+ }
+ } /* end if a binary policy definition, i.e., (ntohl(pol->magic) ==
ACM_MAGIC ) */
+ }
+ return rc;
+}
+
int
-acm_init(void)
+acm_init(unsigned int *initrdidx,
+ const multiboot_info_t *mbi,
+ unsigned long initial_images_start)
{
int ret = -EINVAL;
@@ -127,10 +190,12 @@
if (ret != ACM_OK)
return -EINVAL;
+ acm_setup(initrdidx, mbi, initial_images_start);
printk("%s: Enforcing Primary %s, Secondary %s.\n", __func__,
ACM_POLICY_NAME(acm_bin_pol.primary_policy_code),
ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code));
- return ACM_OK;
-}
+ return ret;
+}
+
#endif
diff -r a4196568095c -r b53a65034532 xen/acm/acm_policy.c
--- a/xen/acm/acm_policy.c Fri Jul 29 18:52:33 2005
+++ b/xen/acm/acm_policy.c Fri Jul 29 20:25:03 2005
@@ -33,7 +33,7 @@
#include <acm/acm_endian.h>
int
-acm_set_policy(void *buf, u16 buf_size, u16 policy)
+acm_set_policy(void *buf, u16 buf_size, u16 policy, int isuserbuffer)
{
u8 *policy_buffer = NULL;
struct acm_policy_buffer *pol;
@@ -53,16 +53,21 @@
/* 1. copy buffer from domain */
if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
goto error_free;
- if (copy_from_user(policy_buffer, buf, buf_size)) {
- printk("%s: Error copying!\n",__func__);
- goto error_free;
+ if (isuserbuffer) {
+ if (copy_from_user(policy_buffer, buf, buf_size)) {
+ printk("%s: Error copying!\n",__func__);
+ goto error_free;
+ }
+ } else {
+ memcpy(policy_buffer, buf, buf_size);
}
/* 2. some sanity checking */
pol = (struct acm_policy_buffer *)policy_buffer;
if ((ntohl(pol->magic) != ACM_MAGIC) ||
(ntohs(pol->primary_policy_code) !=
acm_bin_pol.primary_policy_code) ||
- (ntohs(pol->secondary_policy_code) !=
acm_bin_pol.secondary_policy_code)) {
+ (ntohs(pol->secondary_policy_code) !=
acm_bin_pol.secondary_policy_code) ||
+ (ntohl(pol->policyversion) != POLICY_INTERFACE_VERSION)) {
printkd("%s: Wrong policy magics!\n", __func__);
goto error_free;
}
diff -r a4196568095c -r b53a65034532 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile Fri Jul 29 18:52:33 2005
+++ b/xen/arch/ia64/Makefile Fri Jul 29 20:25:03 2005
@@ -82,9 +82,4 @@
rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
rm -f lib/*.o
-# setup.o contains bits of compile.h so it must be blown away
-delete-unfresh-files:
- echo any unfresh-files to delete for ia64\?
-# rm -f setup.o
-
-.PHONY: default clean delete-unfresh-files
+.PHONY: default clean
diff -r a4196568095c -r b53a65034532 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/Makefile Fri Jul 29 20:25:03 2005
@@ -13,11 +13,18 @@
OBJS := $(subst cpu/cyrix.o,,$(OBJS))
OBJS := $(subst cpu/rise.o,,$(OBJS))
OBJS := $(subst cpu/transmeta.o,,$(OBJS))
-OBJS := $(subst shadow32.o,,$(OBJS))
-else
-OBJS := $(subst shadow.o,,$(OBJS))
-OBJS := $(subst shadow_public.o,,$(OBJS))
-OBJS := $(subst shadow_xxx.o,,$(OBJS))
+endif
+
+OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all
+ifeq ($(TARGET_SUBARCH),x86_64)
+ OBJS += shadow.o shadow_public.o # x86_64: new code
+endif
+ifeq ($(TARGET_SUBARCH),x86_32)
+ ifneq ($(pae),n)
+ OBJS += shadow.o shadow_public.o # x86_32p: new code
+ else
+ OBJS += shadow32.o # x86_32: old code
+ endif
endif
OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
@@ -37,6 +44,15 @@
$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
+ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
+ $(MAKE) $(BASEDIR)/xen-syms.o
+ $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
+ $(MAKE) $(BASEDIR)/xen-syms.o
+ $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+ rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
$(CC) $(CFLAGS) -S -o $@ $<
@@ -53,7 +69,4 @@
rm -f genapic/*.o genapic/*~ genapic/core
rm -f cpu/*.o cpu/*~ cpu/core
-delete-unfresh-files:
- # nothing
-
-.PHONY: default clean delete-unfresh-files
+.PHONY: default clean
diff -r a4196568095c -r b53a65034532 xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/acpi/boot.c Fri Jul 29 20:25:03 2005
@@ -382,7 +382,7 @@
return -1;
}
-#ifdef CONFIG_X86_64
+#if 0/*def CONFIG_X86_64*/
vxtime.hpet_address = hpet_tbl->addr.addrl |
((long) hpet_tbl->addr.addrh << 32);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/apic.c Fri Jul 29 20:25:03 2005
@@ -723,16 +723,8 @@
static void __init setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
-
local_irq_save(flags);
-
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
-
__setup_APIC_LVTT(clocks);
-
local_irq_restore(flags);
}
diff -r a4196568095c -r b53a65034532 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/domain.c Fri Jul 29 20:25:03 2005
@@ -373,6 +373,14 @@
out:
free_vmcs(vmcs);
+ if(v->arch.arch_vmx.io_bitmap_a != 0) {
+ free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000));
+ v->arch.arch_vmx.io_bitmap_a = 0;
+ }
+ if(v->arch.arch_vmx.io_bitmap_b != 0) {
+ free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000));
+ v->arch.arch_vmx.io_bitmap_b = 0;
+ }
v->arch.arch_vmx.vmcs = 0;
return error;
}
@@ -417,12 +425,12 @@
/* Ensure real hardware interrupts are enabled. */
v->arch.guest_context.user_regs.eflags |= EF_IE;
- } else {
- __vmwrite(GUEST_RFLAGS, v->arch.guest_context.user_regs.eflags);
- if (v->arch.guest_context.user_regs.eflags & EF_TF)
- __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
- else
- __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ }
+ else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ {
+ return modify_vmcs(
+ &v->arch.arch_vmx,
+ &v->arch.guest_context.user_regs);
}
if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
@@ -926,6 +934,14 @@
BUG_ON(v->arch.arch_vmx.vmcs == NULL);
free_vmcs(v->arch.arch_vmx.vmcs);
+ if(v->arch.arch_vmx.io_bitmap_a != 0) {
+ free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000));
+ v->arch.arch_vmx.io_bitmap_a = 0;
+ }
+ if(v->arch.arch_vmx.io_bitmap_b != 0) {
+ free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000));
+ v->arch.arch_vmx.io_bitmap_b = 0;
+ }
v->arch.arch_vmx.vmcs = 0;
free_monitor_pagetable(v);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/domain_build.c Fri Jul 29 20:25:03 2005
@@ -63,7 +63,7 @@
unsigned int order = get_order(max * PAGE_SIZE);
if ( (max & (max-1)) != 0 )
order--;
- while ( (page = alloc_domheap_pages(d, order)) == NULL )
+ while ( (page = alloc_domheap_pages(d, order, 0)) == NULL )
if ( order-- == 0 )
break;
return page;
@@ -165,6 +165,8 @@
xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no");
return -EINVAL;
}
+ if (strstr(dsi.xen_section_string, "SHADOW=translate"))
+ opt_dom0_translate = 1;
/* Align load address to 4MB boundary. */
dsi.v_start &= ~((1UL<<22)-1);
@@ -618,11 +620,13 @@
if ( opt_dom0_shadow || opt_dom0_translate )
{
+ printk("dom0: shadow enable\n");
shadow_mode_enable(d, (opt_dom0_translate
? SHM_enable | SHM_refcounts | SHM_translate
: SHM_enable));
if ( opt_dom0_translate )
{
+ printk("dom0: shadow translate\n");
#if defined(__i386__) && defined(CONFIG_X86_PAE)
printk("FIXME: PAE code needed here: %s:%d (%s)\n",
__FILE__, __LINE__, __FUNCTION__);
@@ -655,6 +659,7 @@
}
update_pagetables(v); /* XXX SMP */
+ printk("dom0: shadow setup done\n");
}
return 0;
diff -r a4196568095c -r b53a65034532 xen/arch/x86/genapic/es7000plat.c
--- a/xen/arch/x86/genapic/es7000plat.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/genapic/es7000plat.c Fri Jul 29 20:25:03 2005
@@ -136,7 +136,19 @@
es7000_plat = 0;
} else {
printk("\nEnabling ES7000 specific features...\n");
- es7000_plat = 1;
+ /*
+ * Determine the generation of the ES7000 currently running.
+ *
+ * es7000_plat = 0 if the machine is NOT a Unisys ES7000 box
+ * es7000_plat = 1 if the machine is a 5xx ES7000 box
+ * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+ *
+ */
+ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+ es7000_plat = 2;
+ else
+ es7000_plat = 1;
+
ioapic_renumber_irq = es7000_rename_gsi;
}
return es7000_plat;
@@ -286,7 +298,7 @@
void __init
es7000_sw_apic()
{
- if (es7000_plat) {
+ if (es7000_plat == 1) {
int mip_status;
struct mip_reg es7000_mip_reg;
diff -r a4196568095c -r b53a65034532 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/i8259.c Fri Jul 29 20:25:03 2005
@@ -19,7 +19,7 @@
#include <asm/bitops.h>
#include <xen/delay.h>
#include <asm/apic.h>
-
+#include <io_ports.h>
/*
* Common place to define all x86 IRQ vectors
@@ -395,9 +395,9 @@
/* Set the clock to HZ Hz */
#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
+ outb(LATCH >> 8, PIT_CH0); /* MSB */
setup_irq(2, &cascade);
}
diff -r a4196568095c -r b53a65034532 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/io_apic.c Fri Jul 29 20:25:03 2005
@@ -956,6 +956,13 @@
unsigned long flags;
/*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 <
15))
+ return;
+
+ /*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
@@ -981,10 +988,6 @@
mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
}
- /* Don't check I/O APIC IDs for some xAPIC systems. They have
- * no meaning without the serial APIC bus. */
- if (NO_IOAPIC_CHECK)
- continue;
/*
* Sanity check, is the ID really free? Every APIC in a
* system must have a unique ID or we get lots of nice
diff -r a4196568095c -r b53a65034532 xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/mpparse.c Fri Jul 29 20:25:03 2005
@@ -913,7 +913,10 @@
mp_ioapics[idx].mpc_apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
(boot_cpu_data.x86 < 15))
+ mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ else
+ mp_ioapics[idx].mpc_apicid = id;
mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
/*
@@ -995,9 +998,9 @@
Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
/*
- * ES7000 has no legacy identity mappings
- */
- if (es7000_plat)
+ * Older generations of ES7000 have no legacy identity mappings
+ */
+ if (es7000_plat == 1)
return;
/*
@@ -1053,11 +1056,20 @@
}
}
+#define MAX_GSI_NUM 4096
+
int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
{
int ioapic = -1;
int ioapic_pin = 0;
int idx, bit = 0;
+ static int pci_irq = 16;
+ /*
+ * Mapping between Global System Interrups, which
+ * represent all possible interrupts, and IRQs
+ * assigned to actual devices.
+ */
+ static int gsi_to_irq[MAX_GSI_NUM];
#ifdef CONFIG_ACPI_BUS
/* Don't set up the ACPI SCI because it's already set up */
@@ -1092,10 +1104,25 @@
if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- return gsi;
+ return gsi_to_irq[gsi];
}
mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+ if (edge_level) {
+ /*
+ * For PCI devices assign IRQs in order, avoiding gaps
+ * due to unused I/O APIC pins.
+ */
+ int irq = gsi;
+ if (gsi < MAX_GSI_NUM) {
+ gsi = pci_irq++;
+ gsi_to_irq[irq] = gsi;
+ } else {
+ printk(KERN_ERR "GSI %u is too high\n", gsi);
+ return gsi;
+ }
+ }
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
diff -r a4196568095c -r b53a65034532 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/setup.c Fri Jul 29 20:25:03 2005
@@ -197,7 +197,12 @@
set_in_cr4(X86_CR4_OSXMMEXCPT);
if ( opt_nosmp )
+ {
max_cpus = 0;
+ smp_num_siblings = 1;
+ boot_cpu_data.x86_num_cores = 1;
+ }
+
smp_prepare_cpus(max_cpus);
/* We aren't hotplug-capable yet. */
@@ -245,6 +250,8 @@
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long firsthole_start, nr_pages;
unsigned long initial_images_start, initial_images_end;
+ unsigned long _initrd_start = 0, _initrd_len = 0;
+ unsigned int initrdidx = 1;
struct e820entry e820_raw[E820MAX];
int i, e820_raw_nr = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
@@ -411,7 +418,7 @@
shadow_mode_init();
/* initialize access control security module */
- acm_init();
+ acm_init(&initrdidx, mbi, initial_images_start);
/* Create initial domain 0. */
dom0 = do_createdomain(0, 0);
@@ -450,6 +457,13 @@
}
}
+ if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
+ {
+ _initrd_start = initial_images_start +
+ (mod[initrdidx].mod_start - mod[0].mod_start);
+ _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
+ }
+
/*
* We're going to setup domain0 using the module(s) that we stashed safely
* above our heap. The second module, if present, is an initrd ramdisk.
@@ -457,11 +471,8 @@
if ( construct_dom0(dom0,
initial_images_start,
mod[0].mod_end-mod[0].mod_start,
- (mbi->mods_count == 1) ? 0 :
- initial_images_start +
- (mod[1].mod_start-mod[0].mod_start),
- (mbi->mods_count == 1) ? 0 :
- mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
+ _initrd_start,
+ _initrd_len,
cmdline) != 0)
panic("Could not set up DOM0 guest OS\n");
diff -r a4196568095c -r b53a65034532 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/shadow.c Fri Jul 29 20:25:03 2005
@@ -41,7 +41,13 @@
static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned
long gpfn);
#endif
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS == 3
+#include <asm/shadow_64.h>
+static unsigned long shadow_l3_table(
+ struct domain *d, unsigned long gpfn, unsigned long gmfn);
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
#include <asm/shadow_64.h>
static unsigned long shadow_l4_table(
struct domain *d, unsigned long gpfn, unsigned long gmfn);
@@ -1069,6 +1075,11 @@
int is_l1_shadow =
((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
+#if CONFIG_PAGING_LEVELS == 4
+ is_l1_shadow |=
+ ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
+ PGT_fl1_shadow);
+#endif
match = l1e_from_pfn(readonly_gmfn, flags);
@@ -1684,7 +1695,7 @@
if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
{
- if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
+ if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
{
allow_writes = 1;
l1e_add_flags(gpte, _PAGE_RW);
@@ -1833,7 +1844,7 @@
unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
unsigned long smfn, old_smfn;
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
unsigned long hl2mfn;
#endif
@@ -1890,7 +1901,7 @@
v->arch.shadow_vtable = map_domain_page(smfn);
}
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
/*
* arch.hl2_vtable
*/
@@ -1935,6 +1946,10 @@
// XXX - maybe this can be optimized somewhat??
local_flush_tlb();
}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3
+ /* FIXME: PAE code to be written */
#endif
}
@@ -2427,6 +2442,7 @@
struct domain *d, unsigned long gpfn, unsigned long gmfn)
{
BUG(); /* not implemenated yet */
+ return 42;
}
#endif
@@ -2581,7 +2597,7 @@
* shadow_set_lxe should be put in shadow.h
*/
static void shadow_set_l2e_64(unsigned long va, l2_pgentry_t sl2e,
- int create_l2_shadow)
+ int create_l2_shadow, int put_ref_check)
{
struct vcpu *v = current;
l4_pgentry_t sl4e;
@@ -2608,6 +2624,17 @@
printk("For non VMX shadow, create_l1_shadow:%d\n",
create_l2_shadow);
}
shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
+
+ }
+
+ if ( put_ref_check ) {
+ l2_pgentry_t tmp_sl2e;
+ if ( __shadow_get_l2e(v, va, &tmp_sl2e) ) {
+ if ( l2e_get_flags(tmp_sl2e) & _PAGE_PRESENT )
+ if ( l2e_get_pfn(tmp_sl2e) == l2e_get_pfn(sl2e) ) {
+ put_shadow_ref(l2e_get_pfn(sl2e));
+ }
+ }
}
@@ -2681,7 +2708,7 @@
l1_pgentry_t old_sl1e;
l2_pgentry_t sl2e;
unsigned long nx = 0;
-
+ int put_ref_check = 0;
/* Check if gpfn is 2M aligned */
/* Update guest l2e */
@@ -2712,6 +2739,7 @@
l2e_get_pfn(sl2e) == l1_mfn) {
ESH_LOG("sl2e PRSENT bit is set: %lx, l1_mfn = %lx\n",
l2e_get_pfn(sl2e), l1_mfn);
} else {
+ put_ref_check = 1;
if (!get_shadow_ref(l1_mfn))
BUG();
}
@@ -2735,7 +2763,7 @@
ESH_LOG("<%s>: sl2e = %lx\n", __func__, l2e_get_intpte(sl2e));
/* Map the page to l2*/
- shadow_set_l2e_64(va, sl2e, 1);
+ shadow_set_l2e_64(va, sl2e, 1, put_ref_check);
if (l2e_get_flags(gl2e) & _PAGE_NX)
l2e_add_flags(tmp_l2e, _PAGE_NX);
@@ -2900,10 +2928,14 @@
static void shadow_invlpg_64(struct vcpu *v, unsigned long va)
{
struct domain *d = v->domain;
- //l1_pgentry_64_t gl1e, sl1e;
- l1_pgentry_t sl1e;
+ l1_pgentry_t sl1e, old_sl1e;
shadow_lock(d);
+
+ if ( __shadow_get_l1e(v, va, &old_sl1e) )
+ if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
+ put_page_from_l1e(old_sl1e, d);
+
sl1e = l1e_empty();
__shadow_set_l1e(v, va, &sl1e);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/shadow32.c Fri Jul 29 20:25:03 2005
@@ -2612,7 +2612,7 @@
if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
{
- if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
+ if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
{
allow_writes = 1;
l1e_add_flags(gpte, _PAGE_RW);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/smpboot.c Fri Jul 29 20:25:03 2005
@@ -40,6 +40,7 @@
#include <xen/sched.h>
#include <xen/irq.h>
#include <xen/delay.h>
+#include <xen/softirq.h>
#include <asm/current.h>
#include <asm/mc146818rtc.h>
#include <asm/desc.h>
@@ -406,6 +407,7 @@
*/
if (cpu_has_tsc && cpu_khz)
synchronize_tsc_ap();
+ calibrate_tsc_ap();
}
int cpucount;
@@ -464,6 +466,8 @@
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
+
+ init_percpu_time();
wmb();
startup_cpu_idle_loop();
@@ -1149,6 +1153,7 @@
*/
if (cpu_has_tsc && cpucount && cpu_khz)
synchronize_tsc_bp();
+ calibrate_tsc_bp();
}
/* These are wrappers to interface to the new boot process. Someone
@@ -1167,22 +1172,21 @@
int __devinit __cpu_up(unsigned int cpu)
{
/* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
+ if (cpu_isset(cpu, smp_commenced_mask))
return -ENOSYS;
- }
/* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- local_irq_enable();
+ if (!cpu_isset(cpu, cpu_callin_map))
return -EIO;
- }
-
- local_irq_enable();
+
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
+ while (!cpu_isset(cpu, cpu_online_map)) {
mb();
+ if (softirq_pending(0))
+ do_softirq();
+ }
+
return 0;
}
diff -r a4196568095c -r b53a65034532 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/time.c Fri Jul 29 20:25:03 2005
@@ -1,16 +1,12 @@
-/****************************************************************************
- * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
- * (C) 2002-2003 University of Cambridge
- ****************************************************************************
- *
- * File: i386/time.c
- * Author: Rolf Neugebar & Keir Fraser
- */
-
-/*
- * linux/arch/i386/kernel/time.c
- *
- * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+/******************************************************************************
+ * arch/x86/time.c
+ *
+ * Per-CPU time calibration and management.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * Portions from Linux are:
+ * Copyright (c) 1991, 1992, 1995 Linus Torvalds
*/
#include <xen/config.h>
@@ -31,29 +27,84 @@
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/mc146818rtc.h>
-
-/* GLOBAL */
+#include <asm/div64.h>
+#include <asm/hpet.h>
+#include <io_ports.h>
+
+/* opt_hpet_force: If true, force HPET configuration via PCI space. */
+/* NB. This is a gross hack. Mainly useful for HPET testing. */
+static int opt_hpet_force = 0;
+boolean_param("hpet_force", opt_hpet_force);
+
+#define EPOCH MILLISECS(1000)
+
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
+unsigned long hpet_address;
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
int timer_ack = 0;
unsigned long volatile jiffies;
-
-/* PRIVATE */
-static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */
-static u64 cpu_freq; /* CPU frequency (Hz) */
-static u32 st_scale_f; /* Cycles -> ns, fractional part */
-static u32 st_scale_i; /* Cycles -> ns, integer part */
-static u32 shifted_tsc_irq; /* CPU0's TSC at last 'time update' */
-static u64 full_tsc_irq; /* ...ditto, but all 64 bits */
-static s_time_t stime_irq; /* System time at last 'time update' */
-static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
-static rwlock_t time_lock = RW_LOCK_UNLOCKED;
+static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+
+struct time_scale {
+ int shift;
+ u32 mul_frac;
+};
+
+struct cpu_time {
+ u64 local_tsc_stamp;
+ s_time_t stime_local_stamp;
+ s_time_t stime_master_stamp;
+ struct time_scale tsc_scale;
+ struct ac_timer calibration_timer;
+} __cacheline_aligned;
+
+static struct cpu_time cpu_time[NR_CPUS];
+
+/* Protected by platform_timer_lock. */
+static s_time_t stime_platform_stamp;
+static u64 platform_timer_stamp;
+static struct time_scale platform_timer_scale;
+static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
+static u64 (*read_platform_count)(void);
+
+static inline u32 down_shift(u64 time, int shift)
+{
+ if ( shift < 0 )
+ return (u32)(time >> -shift);
+ return (u32)((u32)time << shift);
+}
+
+/*
+ * 32-bit division of integer dividend and integer divisor yielding
+ * 32-bit fractional quotient.
+ */
+static inline u32 div_frac(u32 dividend, u32 divisor)
+{
+ u32 quotient, remainder;
+ ASSERT(dividend < divisor);
+ __asm__ (
+ "div %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+ return quotient;
+}
+
+/*
+ * 32-bit multiplication of multiplicand and fractional multiplier
+ * yielding 32-bit product (radix point at same position as in multiplicand).
+ */
+static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
+{
+ u32 product_int, product_frac;
+ __asm__ (
+ "mul %3"
+ : "=a" (product_frac), "=d" (product_int)
+ : "0" (multiplicand), "r" (multiplier) );
+ return product_int;
+}
void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
- write_lock_irq(&time_lock);
-
-#ifdef CONFIG_X86_IO_APIC
if ( timer_ack )
{
extern spinlock_t i8259A_lock;
@@ -63,30 +114,9 @@
inb(0x20);
spin_unlock(&i8259A_lock);
}
-#endif
- /*
- * Updates TSC timestamp (used to interpolate passage of time between
- * interrupts).
- */
- rdtscll(full_tsc_irq);
- shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
-
/* Update jiffies counter. */
(*(unsigned long *)&jiffies)++;
-
- /* Update wall time. */
- wc_usec += 1000000/HZ;
- if ( wc_usec >= 1000000 )
- {
- wc_usec -= 1000000;
- wc_sec++;
- }
-
- /* Updates system time (nanoseconds since boot). */
- stime_irq += MILLISECS(1000/HZ);
-
- write_unlock_irq(&time_lock);
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( !cpu_has_apic )
@@ -103,9 +133,9 @@
#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
-static unsigned long __init calibrate_tsc(void)
-{
- u64 start, end, diff;
+static u64 calibrate_boot_tsc(void)
+{
+ u64 start, end;
unsigned long count;
/* Set the Gate high, disable speaker */
@@ -118,9 +148,9 @@
* terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
* to begin countdown.
*/
- outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
rdtscll(start);
for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
@@ -131,15 +161,368 @@
if ( count == 0 )
return 0;
- diff = end - start;
-
-#if defined(__i386__)
- /* If quotient doesn't fit in 32 bits then we return error (zero). */
- if ( diff & ~0xffffffffULL )
+ return ((end - start) * (u64)CALIBRATE_FRAC);
+}
+
+static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
+{
+ u64 tps64 = ticks_per_sec;
+ u32 tps32;
+ int shift = 0;
+
+ while ( tps64 > (MILLISECS(1000)*2) )
+ {
+ tps64 >>= 1;
+ shift--;
+ }
+
+ tps32 = (u32)tps64;
+ while ( tps32 < (u32)MILLISECS(1000) )
+ {
+ tps32 <<= 1;
+ shift++;
+ }
+
+ ts->mul_frac = div_frac(MILLISECS(1000), tps32);
+ ts->shift = shift;
+}
+
+static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
+static unsigned int tsc_calibrate_status = 0;
+
+void calibrate_tsc_bp(void)
+{
+ while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
+ mb();
+
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2);
+
+ tsc_calibrate_status = 1;
+ wmb();
+
+ while ( (inb(0x61) & 0x20) == 0 )
+ continue;
+
+ tsc_calibrate_status = 2;
+ wmb();
+
+ while ( atomic_read(&tsc_calibrate_gang) != 0 )
+ mb();
+}
+
+void calibrate_tsc_ap(void)
+{
+ u64 t1, t2, ticks_per_sec;
+
+ atomic_inc(&tsc_calibrate_gang);
+
+ while ( tsc_calibrate_status < 1 )
+ mb();
+
+ rdtscll(t1);
+
+ while ( tsc_calibrate_status < 2 )
+ mb();
+
+ rdtscll(t2);
+
+ ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
+ set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec);
+
+ atomic_dec(&tsc_calibrate_gang);
+}
+
+static char *freq_string(u64 freq)
+{
+ static char s[20];
+ unsigned int x, y;
+ y = (unsigned int)do_div(freq, 1000000) / 1000;
+ x = (unsigned int)freq;
+ sprintf(s, "%u.%03uMHz", x, y);
+ return s;
+}
+
+/************************************************************
+ * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
+ */
+
+/* Protected by platform_timer_lock. */
+static u64 pit_counter64;
+static u16 pit_stamp;
+static struct ac_timer pit_overflow_timer;
+
+static u16 pit_read_counter(void)
+{
+ u16 count;
+ ASSERT(spin_is_locked(&platform_timer_lock));
+ outb(0x80, PIT_MODE);
+ count = inb(PIT_CH2);
+ count |= inb(PIT_CH2) << 8;
+ return count;
+}
+
+static void pit_overflow(void *unused)
+{
+ u16 counter;
+
+ spin_lock(&platform_timer_lock);
+ counter = pit_read_counter();
+ pit_counter64 += (u16)(pit_stamp - counter);
+ pit_stamp = counter;
+ spin_unlock(&platform_timer_lock);
+
+ set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20));
+}
+
+static u64 read_pit_count(void)
+{
+ return pit_counter64 + (u16)(pit_stamp - pit_read_counter());
+}
+
+static int init_pit(void)
+{
+ read_platform_count = read_pit_count;
+
+ init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0);
+ pit_overflow(NULL);
+ platform_timer_stamp = pit_counter64;
+ set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE);
+
+ printk("Platform timer is %s PIT\n", freq_string(CLOCK_TICK_RATE));
+
+ return 1;
+}
+
+/************************************************************
+ * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
+ */
+
+/* Protected by platform_timer_lock. */
+static u64 hpet_counter64, hpet_overflow_period;
+static u32 hpet_stamp;
+static struct ac_timer hpet_overflow_timer;
+
+static void hpet_overflow(void *unused)
+{
+ u32 counter;
+
+ spin_lock(&platform_timer_lock);
+ counter = hpet_read32(HPET_COUNTER);
+ hpet_counter64 += (u32)(counter - hpet_stamp);
+ hpet_stamp = counter;
+ spin_unlock(&platform_timer_lock);
+
+ set_ac_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period);
+}
+
+static u64 read_hpet_count(void)
+{
+ return hpet_counter64 + (u32)(hpet_read32(HPET_COUNTER) - hpet_stamp);
+}
+
+static int init_hpet(void)
+{
+ u64 hpet_rate;
+ u32 hpet_id, hpet_period, cfg;
+ int i;
+
+ if ( (hpet_address == 0) && opt_hpet_force )
+ {
+ outl(0x800038a0, 0xcf8);
+ outl(0xff000001, 0xcfc);
+ outl(0x800038a0, 0xcf8);
+ hpet_address = inl(0xcfc) & 0xfffffffe;
+ printk("WARNING: Forcibly enabled HPET at %#lx.\n", hpet_address);
+ }
+
+ if ( hpet_address == 0 )
return 0;
-#endif
-
- return (unsigned long)diff;
+
+ set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+
+ hpet_id = hpet_read32(HPET_ID);
+ if ( hpet_id == 0 )
+ {
+ printk("BAD HPET vendor id.\n");
+ return 0;
+ }
+
+ /* Check for sane period (100ps <= period <= 100ns). */
+ hpet_period = hpet_read32(HPET_PERIOD);
+ if ( (hpet_period > 100000000) || (hpet_period < 100000) )
+ {
+ printk("BAD HPET period %u.\n", hpet_period);
+ return 0;
+ }
+
+ cfg = hpet_read32(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_write32(cfg, HPET_CFG);
+
+ for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
+ {
+ cfg = hpet_read32(HPET_T0_CFG + i*0x20);
+ cfg &= ~HPET_TN_ENABLE;
+ hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
+ }
+
+ cfg = hpet_read32(HPET_CFG);
+ cfg |= HPET_CFG_ENABLE;
+ hpet_write32(cfg, HPET_CFG);
+
+ read_platform_count = read_hpet_count;
+
+ hpet_rate = 1000000000000000ULL; /* 10^15 */
+ (void)do_div(hpet_rate, hpet_period);
+ set_time_scale(&platform_timer_scale, hpet_rate);
+
+ /* Trigger overflow avoidance roughly when counter increments 2^31. */
+ if ( (hpet_rate >> 31) != 0 )
+ {
+ hpet_overflow_period = MILLISECS(1000);
+ (void)do_div(hpet_overflow_period, (u32)(hpet_rate >> 31) + 1);
+ }
+ else
+ {
+ hpet_overflow_period = MILLISECS(1000) << 31;
+ (void)do_div(hpet_overflow_period, (u32)hpet_rate);
+ }
+
+ init_ac_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0);
+ hpet_overflow(NULL);
+ platform_timer_stamp = hpet_counter64;
+
+ printk("Platform timer is %s HPET\n", freq_string(hpet_rate));
+
+ return 1;
+}
+
+/************************************************************
+ * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER
+ */
+
+int use_cyclone;
+
+/*
+ * Although the counter is read via a 64-bit register, I believe it is actually
+ * a 40-bit counter. Since this will wrap, I read only the low 32 bits and
+ * periodically fold into a 64-bit software counter, just as for PIT and HPET.
+ */
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+
+/* Protected by platform_timer_lock. */
+static u64 cyclone_counter64;
+static u32 cyclone_stamp;
+static struct ac_timer cyclone_overflow_timer;
+static volatile u32 *cyclone_timer; /* Cyclone MPMC0 register */
+
+static void cyclone_overflow(void *unused)
+{
+ u32 counter;
+
+ spin_lock(&platform_timer_lock);
+ counter = *cyclone_timer;
+ cyclone_counter64 += (u32)(counter - cyclone_stamp);
+ cyclone_stamp = counter;
+ spin_unlock(&platform_timer_lock);
+
+ set_ac_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000));
+}
+
+static u64 read_cyclone_count(void)
+{
+ return cyclone_counter64 + (u32)(*cyclone_timer - cyclone_stamp);
+}
+
+static volatile u32 *map_cyclone_reg(unsigned long regaddr)
+{
+ unsigned long pageaddr = regaddr & PAGE_MASK;
+ unsigned long offset = regaddr & ~PAGE_MASK;
+ set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
+ return (volatile u32 *)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
+}
+
+static int init_cyclone(void)
+{
+ u32 base;
+
+ if ( !use_cyclone )
+ return 0;
+
+ /* Find base address. */
+ base = *(map_cyclone_reg(CYCLONE_CBAR_ADDR));
+ if ( base == 0 )
+ {
+ printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n");
+ return 0;
+ }
+
+ /* Enable timer and map the counter register. */
+ *(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1;
+ *(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1;
+ cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET);
+
+ read_platform_count = read_cyclone_count;
+
+ init_ac_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0);
+ cyclone_overflow(NULL);
+ platform_timer_stamp = cyclone_counter64;
+ set_time_scale(&platform_timer_scale, CYCLONE_TIMER_FREQ);
+
+ printk("Platform timer is %s IBM Cyclone\n",
+ freq_string(CYCLONE_TIMER_FREQ));
+
+ return 1;
+}
+
+/************************************************************
+ * GENERIC PLATFORM TIMER INFRASTRUCTURE
+ */
+
+static s_time_t __read_platform_stime(u64 platform_time)
+{
+ u64 diff64 = platform_time - platform_timer_stamp;
+ u32 diff = down_shift(diff64, platform_timer_scale.shift);
+ ASSERT(spin_is_locked(&platform_timer_lock));
+ return (stime_platform_stamp +
+ (u64)mul_frac(diff, platform_timer_scale.mul_frac));
+}
+
+static s_time_t read_platform_stime(void)
+{
+ u64 counter;
+ s_time_t stime;
+
+ spin_lock(&platform_timer_lock);
+ counter = read_platform_count();
+ stime = __read_platform_stime(counter);
+ spin_unlock(&platform_timer_lock);
+
+ return stime;
+}
+
+static void platform_time_calibration(void)
+{
+ u64 counter;
+ s_time_t stamp;
+
+ spin_lock(&platform_timer_lock);
+ counter = read_platform_count();
+ stamp = __read_platform_stime(counter);
+ stime_platform_stamp = stamp;
+ platform_timer_stamp = counter;
+ spin_unlock(&platform_timer_lock);
+}
+
+static void init_platform_timer(void)
+{
+ if ( !init_cyclone() && !init_hpet() )
+ BUG_ON(!init_pit());
}
@@ -233,140 +616,226 @@
* System Time
***************************************************************************/
-static inline u64 get_time_delta(void)
-{
- s32 delta_tsc;
- u32 low;
- u64 delta, tsc;
-
- ASSERT(st_scale_f || st_scale_i);
+s_time_t get_s_time(void)
+{
+ struct cpu_time *t = &cpu_time[smp_processor_id()];
+ u64 tsc;
+ u32 delta;
+ s_time_t now;
rdtscll(tsc);
- low = (u32)(tsc >> rdtsc_bitshift);
- delta_tsc = (s32)(low - shifted_tsc_irq);
- if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
- delta = ((u64)delta_tsc * st_scale_f);
- delta >>= 32;
- delta += ((u64)delta_tsc * st_scale_i);
-
- return delta;
-}
-
-s_time_t get_s_time(void)
-{
- s_time_t now;
- unsigned long flags;
-
- read_lock_irqsave(&time_lock, flags);
-
- now = stime_irq + get_time_delta();
-
- /* Ensure that the returned system time is monotonically increasing. */
- {
- static s_time_t prev_now = 0;
- if ( unlikely(now < prev_now) )
- now = prev_now;
- prev_now = now;
- }
-
- read_unlock_irqrestore(&time_lock, flags);
-
- return now;
+ delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
+ now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
+
+ return now;
}
static inline void __update_dom_time(struct vcpu *v)
{
- struct domain *d = v->domain;
- shared_info_t *si = d->shared_info;
-
- spin_lock(&d->time_lock);
-
- si->time_version1++;
+ struct cpu_time *t = &cpu_time[smp_processor_id()];
+ struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
+
+ u->time_version1++;
wmb();
- si->cpu_freq = cpu_freq;
- si->tsc_timestamp = full_tsc_irq;
- si->system_time = stime_irq;
- si->wc_sec = wc_sec;
- si->wc_usec = wc_usec;
+ u->tsc_timestamp = t->local_tsc_stamp;
+ u->system_time = t->stime_local_stamp;
+ u->tsc_to_system_mul = t->tsc_scale.mul_frac;
+ u->tsc_shift = (s8)t->tsc_scale.shift;
wmb();
- si->time_version2++;
-
- spin_unlock(&d->time_lock);
+ u->time_version2++;
+
+ /* Should only do this during do_settime(). */
+ v->domain->shared_info->wc_sec = wc_sec;
+ v->domain->shared_info->wc_usec = wc_usec;
}
void update_dom_time(struct vcpu *v)
{
- unsigned long flags;
-
- if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
- {
- read_lock_irqsave(&time_lock, flags);
+ if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp !=
+ cpu_time[smp_processor_id()].local_tsc_stamp )
__update_dom_time(v);
- read_unlock_irqrestore(&time_lock, flags);
- }
}
/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
{
- s64 delta;
- long _usecs = (long)usecs;
-
- write_lock_irq(&time_lock);
-
- delta = (s64)(stime_irq - system_time_base);
-
- _usecs += (long)(delta/1000);
- while ( _usecs >= 1000000 )
- {
- _usecs -= 1000000;
- secs++;
- }
-
- wc_sec = secs;
- wc_usec = _usecs;
-
- /* Others will pick up the change at the next tick. */
+ u64 x, base_usecs;
+ u32 y;
+
+ base_usecs = system_time_base;
+ do_div(base_usecs, 1000);
+
+ x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
+ y = do_div(x, 1000000);
+
+ wc_sec = (unsigned long)x;
+ wc_usec = (unsigned long)y;
+
__update_dom_time(current);
- send_guest_virq(current, VIRQ_TIMER);
-
- write_unlock_irq(&time_lock);
-}
-
+}
+
+static void local_time_calibration(void *unused)
+{
+ unsigned int cpu = smp_processor_id();
+
+ /*
+ * System timestamps, extrapolated from local and master oscillators,
+ * taken during this calibration and the previous calibration.
+ */
+ s_time_t prev_local_stime, curr_local_stime;
+ s_time_t prev_master_stime, curr_master_stime;
+
+ /* TSC timestamps taken during this calibration and prev calibration. */
+ u64 prev_tsc, curr_tsc;
+
+ /*
+ * System time and TSC ticks elapsed during the previous calibration
+ * 'epoch'. These values are down-shifted to fit in 32 bits.
+ */
+ u64 stime_elapsed64, tsc_elapsed64;
+ u32 stime_elapsed32, tsc_elapsed32;
+
+ /* The accumulated error in the local estimate. */
+ u64 local_stime_err;
+
+ /* Error correction to slow down a fast local clock. */
+ u32 error_factor = 0;
+
+ /* Calculated TSC shift to ensure 32-bit scale multiplier. */
+ int tsc_shift = 0;
+
+ /* The overall calibration scale multiplier. */
+ u32 calibration_mul_frac;
+
+ prev_tsc = cpu_time[cpu].local_tsc_stamp;
+ prev_local_stime = cpu_time[cpu].stime_local_stamp;
+ prev_master_stime = cpu_time[cpu].stime_master_stamp;
+
+ /* Disable IRQs to get 'instantaneous' current timestamps. */
+ local_irq_disable();
+ rdtscll(curr_tsc);
+ curr_local_stime = get_s_time();
+ curr_master_stime = read_platform_stime();
+ local_irq_enable();
+
+#if 0
+ printk("PRE%d: tsc=%lld stime=%lld master=%lld\n",
+ cpu, prev_tsc, prev_local_stime, prev_master_stime);
+ printk("CUR%d: tsc=%lld stime=%lld master=%lld -> %lld\n",
+ cpu, curr_tsc, curr_local_stime, curr_master_stime,
+ curr_master_stime - curr_local_stime);
+#endif
+
+ /* Local time warps forward if it lags behind master time. */
+ if ( curr_local_stime < curr_master_stime )
+ curr_local_stime = curr_master_stime;
+
+ stime_elapsed64 = curr_master_stime - prev_master_stime;
+ tsc_elapsed64 = curr_tsc - prev_tsc;
+
+ /*
+ * Calculate error-correction factor. This only slows down a fast local
+ * clock (slow clocks are warped forwards). The scale factor is clamped
+ * to >= 0.5.
+ */
+ if ( curr_local_stime != curr_master_stime )
+ {
+ local_stime_err = curr_local_stime - curr_master_stime;
+ if ( local_stime_err > EPOCH )
+ local_stime_err = EPOCH;
+ error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
+ }
+
+ /*
+ * We require 0 < stime_elapsed < 2^31.
+ * This allows us to binary shift a 32-bit tsc_elapsed such that:
+ * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
+ */
+ while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
+ ((s32)stime_elapsed64 < 0) )
+ {
+ stime_elapsed64 >>= 1;
+ tsc_elapsed64 >>= 1;
+ }
+
+ /* stime_master_diff now fits in a 32-bit word. */
+ stime_elapsed32 = (u32)stime_elapsed64;
+
+ /* tsc_elapsed <= 2*stime_elapsed */
+ while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
+ {
+ tsc_elapsed64 >>= 1;
+ tsc_shift--;
+ }
+
+ /* Local difference must now fit in 32 bits. */
+ ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
+ tsc_elapsed32 = (u32)tsc_elapsed64;
+
+ /* tsc_elapsed > stime_elapsed */
+ ASSERT(tsc_elapsed32 != 0);
+ while ( tsc_elapsed32 <= stime_elapsed32 )
+ {
+ tsc_elapsed32 <<= 1;
+ tsc_shift++;
+ }
+
+ calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32);
+ if ( error_factor != 0 )
+ calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor);
+
+#if 0
+ printk("---%d: %08x %08x %d\n", cpu,
+ error_factor, calibration_mul_frac, tsc_shift);
+#endif
+
+ /* Record new timestamp information. */
+ cpu_time[cpu].tsc_scale.mul_frac = calibration_mul_frac;
+ cpu_time[cpu].tsc_scale.shift = tsc_shift;
+ cpu_time[cpu].local_tsc_stamp = curr_tsc;
+ cpu_time[cpu].stime_local_stamp = curr_local_stime;
+ cpu_time[cpu].stime_master_stamp = curr_master_stime;
+
+ set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
+
+ if ( cpu == 0 )
+ platform_time_calibration();
+}
+
+void init_percpu_time(void)
+{
+ unsigned int cpu = smp_processor_id();
+ unsigned long flags;
+ s_time_t now;
+
+ local_irq_save(flags);
+ rdtscll(cpu_time[cpu].local_tsc_stamp);
+ now = (cpu == 0) ? 0 : read_platform_stime();
+ local_irq_restore(flags);
+
+ cpu_time[cpu].stime_master_stamp = now;
+ cpu_time[cpu].stime_local_stamp = now;
+
+ init_ac_timer(&cpu_time[cpu].calibration_timer,
+ local_time_calibration, NULL, cpu);
+ set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
+}
/* Late init function (after all CPUs are booted). */
-int __init init_xen_time()
-{
- u64 scale;
- unsigned int cpu_ghz;
-
- cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
- for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
- continue;
-
- scale = 1000000000LL << (32 + rdtsc_bitshift);
- scale /= cpu_freq;
- st_scale_f = scale & 0xffffffff;
- st_scale_i = scale >> 32;
+int __init init_xen_time(void)
+{
+ wc_sec = get_cmos_time();
local_irq_disable();
- /* System time ticks from zero. */
- rdtscll(full_tsc_irq);
- stime_irq = (s_time_t)0;
- shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
-
- /* Wallclock time starts as the initial RTC time. */
- wc_sec = get_cmos_time();
+ init_percpu_time();
+
+ stime_platform_stamp = 0;
+ init_platform_timer();
local_irq_enable();
-
- printk("Time init:\n");
- printk(".... cpu_freq: %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);
- printk(".... scale: %08X:%08X\n", (u32)(scale>>32),(u32)scale);
- printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec);
return 0;
}
@@ -375,15 +844,12 @@
/* Early init function. */
void __init early_time_init(void)
{
- unsigned long ticks_per_frac = calibrate_tsc();
-
- if ( !ticks_per_frac )
- panic("Error calibrating TSC\n");
-
- cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
-
- cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
-
+ u64 tmp = calibrate_boot_tsc();
+
+ set_time_scale(&cpu_time[0].tsc_scale, tmp);
+
+ do_div(tmp, 1000);
+ cpu_khz = (unsigned long)tmp;
printk("Detected %lu.%03lu MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/traps.c Fri Jul 29 20:25:03 2005
@@ -40,6 +40,7 @@
#include <xen/perfc.h>
#include <xen/softirq.h>
#include <xen/domain_page.h>
+#include <xen/symbols.h>
#include <asm/shadow.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -100,7 +101,7 @@
static int debug_stack_lines = 20;
integer_param("debug_stack_lines", debug_stack_lines);
-static inline int kernel_text_address(unsigned long addr)
+int is_kernel_text(unsigned long addr)
{
extern char _stext, _etext;
if (addr >= (unsigned long) &_stext &&
@@ -110,6 +111,12 @@
}
+unsigned long kernel_text_end(void)
+{
+ extern char _etext;
+ return (unsigned long) &_etext;
+}
+
void show_guest_stack(void)
{
int i;
@@ -150,11 +157,12 @@
while ( ((long) stack & (STACK_SIZE-1)) != 0 )
{
addr = *stack++;
- if ( kernel_text_address(addr) )
+ if ( is_kernel_text(addr) )
{
if ( (i != 0) && ((i % 6) == 0) )
printk("\n ");
- printk("[<%p>] ", _p(addr));
+ printk("[<%p>]", _p(addr));
+ print_symbol(" %s\n", addr);
i++;
}
}
@@ -177,10 +185,7 @@
if ( (i != 0) && ((i % 8) == 0) )
printk("\n ");
addr = *stack++;
- if ( kernel_text_address(addr) )
- printk("[%p] ", _p(addr));
- else
- printk("%p ", _p(addr));
+ printk("%p ", _p(addr));
}
if ( i == 0 )
printk("Stack empty.");
diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/vmx.c Fri Jul 29 20:25:03 2005
@@ -38,7 +38,7 @@
#include <asm/vmx_vmcs.h>
#include <asm/vmx_intercept.h>
#include <asm/shadow.h>
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
#include <asm/shadow_64.h>
#endif
@@ -94,12 +94,16 @@
msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
break
-#define CASE_WRITE_MSR(address) \
- case MSR_ ## address: \
- msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
- if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)){ \
- set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
- }\
+#define CASE_WRITE_MSR(address) \
+ case MSR_ ## address: \
+ { \
+ msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
+ if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
+ set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
+ } \
+ wrmsrl(MSR_ ## address, msr_content); \
+ set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
+ } \
break
#define IS_CANO_ADDRESS(add) 1
@@ -604,11 +608,6 @@
addr = (exit_qualification >> 16) & (0xffff);
else
addr = regs->edx & 0xffff;
-
- if (addr == 0x80) {
- __update_guest_eip(inst_len);
- return;
- }
vio = get_vio(d->domain, d->vcpu_id);
if (vio == 0) {
@@ -1261,6 +1260,7 @@
CASE_SET_REG(EBP, ebp);
CASE_SET_REG(ESI, esi);
CASE_SET_REG(EDI, edi);
+ CASE_EXTEND_SET_REG
case REG_ESP:
__vmwrite(GUEST_RSP, value);
regs->esp = value;
diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/vmx_intercept.c Fri Jul 29 20:25:03 2005
@@ -24,10 +24,10 @@
#include <asm/vmx_virpit.h>
#include <asm/vmx_intercept.h>
#include <public/io/ioreq.h>
-
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <io_ports.h>
#ifdef CONFIG_VMX
@@ -175,7 +175,7 @@
p->port_mm)
return 0;
- if (p->addr == 0x43 &&
+ if (p->addr == PIT_MODE &&
p->dir == 0 && /* write */
((p->u.data >> 4) & 0x3) == 0 && /* latch command */
((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
@@ -183,7 +183,7 @@
return 1;
}
- if (p->addr == (0x40 + vpit->channel) &&
+ if (p->addr == (PIT_CH0 + vpit->channel) &&
p->dir == 1) { /* read */
p->u.data = pit_read_io(vpit);
resume_pit_io(p);
@@ -197,12 +197,23 @@
static void pit_timer_fn(void *data)
{
struct vmx_virpit_t *vpit = data;
+ s_time_t next;
+ int missed_ticks;
+
+ missed_ticks = (NOW() - vpit->scheduled) / MILLISECS(vpit->period);
/* Set the pending intr bit, and send evtchn notification to myself. */
if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
vpit->pending_intr_nr++; /* already set, then count the pending intr */
- set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period));
+ /* pick up missed timer tick */
+ if ( missed_ticks > 0 ) {
+ vpit->pending_intr_nr+= missed_ticks;
+ vpit->scheduled += missed_ticks * MILLISECS(vpit->period);
+ }
+ next = vpit->scheduled + MILLISECS(vpit->period);
+ set_ac_timer(&vpit->pit_timer, next);
+ vpit->scheduled = next;
}
@@ -263,7 +274,8 @@
vpit->intr_bitmap = intr;
- set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period));
+ vpit->scheduled = NOW() + MILLISECS(vpit->period);
+ set_ac_timer(&vpit->pit_timer, vpit->scheduled);
/*restore the state*/
p->state = STATE_IORESP_READY;
diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/vmx_io.c Fri Jul 29 20:25:03 2005
@@ -39,14 +39,20 @@
#ifdef CONFIG_VMX
#if defined (__i386__)
-static void load_cpu_user_regs(struct cpu_user_regs *regs)
+void load_cpu_user_regs(struct cpu_user_regs *regs)
{
/*
* Write the guest register value into VMCS
*/
__vmwrite(GUEST_SS_SELECTOR, regs->ss);
__vmwrite(GUEST_RSP, regs->esp);
+
__vmwrite(GUEST_RFLAGS, regs->eflags);
+ if (regs->eflags & EF_TF)
+ __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ else
+ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
__vmwrite(GUEST_CS_SELECTOR, regs->cs);
__vmwrite(GUEST_RIP, regs->eip);
}
@@ -175,11 +181,17 @@
}
}
#else
-static void load_cpu_user_regs(struct cpu_user_regs *regs)
+void load_cpu_user_regs(struct cpu_user_regs *regs)
{
__vmwrite(GUEST_SS_SELECTOR, regs->ss);
__vmwrite(GUEST_RSP, regs->rsp);
+
__vmwrite(GUEST_RFLAGS, regs->rflags);
+ if (regs->rflags & EF_TF)
+ __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ else
+ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
__vmwrite(GUEST_CS_SELECTOR, regs->cs);
__vmwrite(GUEST_RIP, regs->rip);
}
diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/vmx_platform.c Fri Jul 29 20:25:03 2005
@@ -32,7 +32,7 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
#include <asm/shadow_64.h>
#endif
#ifdef CONFIG_VMX
diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/vmx_vmcs.c Fri Jul 29 20:25:03 2005
@@ -59,9 +59,11 @@
free_xenheap_pages(vmcs, order);
}
-static inline int construct_vmcs_controls(void)
+static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx)
{
int error = 0;
+ void *io_bitmap_a;
+ void *io_bitmap_b;
error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
MONITOR_PIN_BASED_EXEC_CONTROLS);
@@ -72,6 +74,20 @@
error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
+
+ /* need to use 0x1000 instead of PAGE_SIZE */
+ io_bitmap_a = (void*) alloc_xenheap_pages(get_order(0x1000));
+ io_bitmap_b = (void*) alloc_xenheap_pages(get_order(0x1000));
+ memset(io_bitmap_a, 0xff, 0x1000);
+ /* don't bother debug port access */
+ clear_bit(PC_DEBUG_PORT, io_bitmap_a);
+ memset(io_bitmap_b, 0xff, 0x1000);
+
+ error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_phys(io_bitmap_a));
+ error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_phys(io_bitmap_b));
+
+ arch_vmx->io_bitmap_a = io_bitmap_a;
+ arch_vmx->io_bitmap_b = io_bitmap_b;
return error;
}
@@ -190,10 +206,14 @@
vmx_setup_platform(v, regs);
+ __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
+ host_env.idtr_limit = desc.size;
+ host_env.idtr_base = desc.address;
+ error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
+
__asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
host_env.gdtr_limit = desc.size;
host_env.gdtr_base = desc.address;
-
error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
@@ -351,7 +371,6 @@
{
int error = 0;
unsigned long crn;
- struct Xgt_desc_struct desc;
/* Host Selectors */
host_env->ds_selector = __HYPERVISOR_DS;
@@ -377,14 +396,7 @@
host_env->ds_base = 0;
host_env->cs_base = 0;
-/* Debug */
- __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
- host_env->idtr_limit = desc.size;
- host_env->idtr_base = desc.address;
- error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
-
__asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
-
host_env->cr0 = crn;
error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
@@ -392,6 +404,7 @@
__asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
host_env->cr4 = crn;
error |= __vmwrite(HOST_CR4, crn);
+
error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
#ifdef __x86_64__
/* TBD: support cr8 for 64-bit guest */
@@ -435,7 +448,7 @@
(unsigned long) vmcs_phys_ptr);
return -EINVAL;
}
- if ((error = construct_vmcs_controls())) {
+ if ((error = construct_vmcs_controls(arch_vmx))) {
printk("construct_vmcs: construct_vmcs_controls failed\n");
return -EINVAL;
}
@@ -455,6 +468,35 @@
printk("construct_vmcs: setting Exception bitmap failed\n");
return -EINVAL;
}
+
+ if (regs->eflags & EF_TF)
+ __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ else
+ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
+ return 0;
+}
+
+/*
+ * modify guest eflags and execption bitmap for gdb
+ */
+int modify_vmcs(struct arch_vmx_struct *arch_vmx,
+ struct cpu_user_regs *regs)
+{
+ int error;
+ u64 vmcs_phys_ptr, old, old_phys_ptr;
+ vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
+
+ old_phys_ptr = virt_to_phys(&old);
+ __vmptrst(old_phys_ptr);
+ if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
+ printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n",
+ (unsigned long) vmcs_phys_ptr);
+ return -EINVAL;
+ }
+ load_cpu_user_regs(regs);
+
+ __vmptrld(old_phys_ptr);
return 0;
}
diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/x86_32/mm.c Fri Jul 29 20:25:03 2005
@@ -102,7 +102,7 @@
mpt_size = 4*1024*1024;
for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
{
- if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+ if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
panic("Not enough memory to bootstrap Xen.\n");
idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] =
l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/x86_32/traps.c Fri Jul 29 20:25:03 2005
@@ -6,6 +6,7 @@
#include <xen/console.h>
#include <xen/mm.h>
#include <xen/irq.h>
+#include <xen/symbols.h>
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/vmx.h>
@@ -63,10 +64,10 @@
}
}
- printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx "
- "CONTEXT: %s\n",
- smp_processor_id(), (unsigned long)0xffff & regs->cs,
- eip, eflags, context);
+ printk("CPU: %d\nEIP: %04lx:[<%08lx>]",
+ smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
+ print_symbol(" %s\n", eip);
+ printk("EFLAGS: %08lx CONTEXT: %s\n", eflags, context);
printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
@@ -119,8 +120,10 @@
/* Find information saved during fault and dump it to the console. */
tss = &init_tss[cpu];
- printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
- cpu, tss->cs, tss->eip, tss->eflags);
+ printk("CPU: %d\nEIP: %04x:[<%08x>]",
+ cpu, tss->cs, tss->eip);
+ print_symbol(" %s\n", tss->eip);
+ printk("EFLAGS: %08x\n", tss->eflags);
printk("CR3: %08x\n", tss->__cr3);
printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
tss->eax, tss->ebx, tss->ecx, tss->edx);
diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/x86_64/entry.S Fri Jul 29 20:25:03 2005
@@ -587,6 +587,7 @@
.quad do_boot_vcpu
.quad do_set_segment_base /* 25 */
.quad do_mmuext_op
+ .quad do_policy_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.quad do_ni_hypercall
.endr
diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/x86_64/mm.c Fri Jul 29 20:25:03 2005
@@ -100,7 +100,7 @@
*/
for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
{
- pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER);
+ pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0);
if ( pg == NULL )
panic("Not enough memory for m2p table\n");
map_pages_to_xen(
diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Jul 29 18:52:33 2005
+++ b/xen/arch/x86/x86_64/traps.c Fri Jul 29 20:25:03 2005
@@ -6,6 +6,7 @@
#include <xen/errno.h>
#include <xen/mm.h>
#include <xen/irq.h>
+#include <xen/symbols.h>
#include <xen/console.h>
#include <xen/sched.h>
#include <asm/current.h>
@@ -14,8 +15,10 @@
void show_registers(struct cpu_user_regs *regs)
{
- printk("CPU: %d\nEIP: %04x:[<%016lx>] \nEFLAGS: %016lx\n",
- smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags);
+ printk("CPU: %d\nEIP: %04x:[<%016lx>]",
+ smp_processor_id(), 0xffff & regs->cs, regs->rip);
+ print_symbol(" %s\n", regs->rip);
+ printk("EFLAGS: %016lx\n", regs->eflags);
printk("rax: %016lx rbx: %016lx rcx: %016lx rdx: %016lx\n",
regs->rax, regs->rbx, regs->rcx, regs->rdx);
printk("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
diff -r a4196568095c -r b53a65034532 xen/common/ac_timer.c
--- a/xen/common/ac_timer.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/ac_timer.c Fri Jul 29 20:25:03 2005
@@ -202,7 +202,7 @@
do {
heap = ac_timers[cpu].heap;
now = NOW();
-
+
while ( (GET_HEAP_SIZE(heap) != 0) &&
((t = heap[1])->expires < (now + TIMER_SLOP)) )
{
diff -r a4196568095c -r b53a65034532 xen/common/dom_mem_ops.c
--- a/xen/common/dom_mem_ops.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/dom_mem_ops.c Fri Jul 29 20:25:03 2005
@@ -37,7 +37,8 @@
unsigned long *extent_list,
unsigned long start_extent,
unsigned int nr_extents,
- unsigned int extent_order)
+ unsigned int extent_order,
+ unsigned int flags)
{
struct pfn_info *page;
unsigned long i;
@@ -56,7 +57,8 @@
{
PREEMPT_CHECK(MEMOP_increase_reservation);
- if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) )
+ if ( unlikely((page = alloc_domheap_pages(d, extent_order,
+ flags)) == NULL) )
{
DPRINTK("Could not allocate a frame\n");
return i;
@@ -131,10 +133,15 @@
{
struct domain *d;
unsigned long rc, start_extent;
+ unsigned int address_bits_order;
/* Extract @start_extent from @op. */
start_extent = op >> START_EXTENT_SHIFT;
op &= (1 << START_EXTENT_SHIFT) - 1;
+
+ /* seperate extent_order and address_bits_order */
+ address_bits_order = (extent_order >> 8) & 0xff;
+ extent_order &= 0xff;
if ( unlikely(start_extent > nr_extents) )
return -EINVAL;
@@ -150,7 +157,8 @@
{
case MEMOP_increase_reservation:
rc = alloc_dom_mem(
- d, extent_list, start_extent, nr_extents, extent_order);
+ d, extent_list, start_extent, nr_extents, extent_order,
+ (address_bits_order <= 32) ? ALLOC_DOM_DMA : 0);
break;
case MEMOP_decrease_reservation:
rc = free_dom_mem(
diff -r a4196568095c -r b53a65034532 xen/common/domain.c
--- a/xen/common/domain.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/domain.c Fri Jul 29 20:25:03 2005
@@ -39,10 +39,8 @@
atomic_set(&d->refcnt, 1);
atomic_set(&v->pausecnt, 0);
- d->domain_id = dom_id;
- v->processor = cpu;
-
- spin_lock_init(&d->time_lock);
+ d->domain_id = dom_id;
+ v->processor = cpu;
spin_lock_init(&d->big_lock);
diff -r a4196568095c -r b53a65034532 xen/common/grant_table.c
--- a/xen/common/grant_table.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/grant_table.c Fri Jul 29 20:25:03 2005
@@ -809,6 +809,146 @@
}
#endif
+static long
+gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+{
+ struct domain *d = current->domain;
+ struct domain *e;
+ struct pfn_info *page;
+ u32 _d, _nd, x, y;
+ int i;
+ int result = GNTST_okay;
+
+ for (i = 0; i < count; i++) {
+ gnttab_donate_t *gop = &uop[i];
+#if GRANT_DEBUG
+ printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ i, gop->mfn, gop->domid, gop->handle);
+#endif
+ page = &frame_table[gop->mfn];
+
+ if (unlikely(IS_XEN_HEAP_FRAME(page))) {
+ printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long)
gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+ printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long)
gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+ printk("gnttab_donate: can't find domain %d\n", gop->domid);
+ gop->status = GNTST_bad_domain;
+ continue;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while
+ * there is just one benign reference to the page
+ * (PGC_allocated). If that reference disappears then the
+ * deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+ printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page),
+ d, d->domain_id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ return 0;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (_nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+ } while (unlikely(_nd != _d) || unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now
+ * anonymous), so noone else is spinning to try to delete
+ * this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated
+ * pages when it is dying.
+ */
+#ifdef GRANT_DEBUG
+ if (unlikely(e->tot_pages >= e->max_pages)) {
+ printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+ e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
+ printk("gnttab_donate: target domain is dying\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#else
+ ASSERT(e->tot_pages <= e->max_pages);
+ if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: Transferee has no reservation headroom
(%d,%d), or "
+ "provided a bad grant ref (%08x), or is dying (%p).\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#endif
+ /* Okay, add the page to 'e'. */
+ if (unlikely(e->tot_pages++ == 0)) {
+ get_knownalive_domain(e);
+ }
+ list_add_tail(&page->list, &e->page_list);
+ page_set_owner(page, e);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /*
+ * Transfer is all done: tell the guest about its new page
+ * frame.
+ */
+ gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+
+ put_domain(e);
+
+ gop->status = GNTST_okay;
+ }
+ return result;
+}
+
long
do_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
@@ -843,6 +983,11 @@
rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
break;
#endif
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t))))
+ goto out;
+ rc = gnttab_donate(uop, count);
+ break;
default:
rc = -ENOSYS;
break;
@@ -902,6 +1047,9 @@
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
map = &lgt->maptrack[handle];
+
+ if ( map->domid != rd->domain_id )
+ continue;
if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
diff -r a4196568095c -r b53a65034532 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/page_alloc.c Fri Jul 29 20:25:03 2005
@@ -207,7 +207,13 @@
#define MEMZONE_XEN 0
#define MEMZONE_DOM 1
-#define NR_ZONES 2
+#define MEMZONE_DMADOM 2
+#define NR_ZONES 3
+
+
+#define MAX_DMADOM_PFN 0xFFFFF
+#define pfn_dom_zone_type(_pfn) \
+ (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
/* Up to 2^20 pages can be allocated at once. */
#define MAX_ORDER 20
@@ -236,7 +242,7 @@
if ( next_free )
map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
if ( curr_free )
- free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
+ free_heap_pages(pfn_dom_zone_type(i), pfn_to_page(i), 0);
}
}
@@ -351,10 +357,10 @@
void scrub_heap_pages(void)
{
void *p;
- unsigned long pfn, flags;
+ unsigned long pfn;
+ int cpu = smp_processor_id();
printk("Scrubbing Free RAM: ");
- watchdog_disable();
for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
{
@@ -362,12 +368,15 @@
if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
printk(".");
+ if ( unlikely(softirq_pending(cpu)) )
+ do_softirq();
+
/* Quick lock-free check. */
if ( allocated_in_map(pfn) )
continue;
-
- spin_lock_irqsave(&heap_lock, flags);
-
+
+ spin_lock_irq(&heap_lock);
+
/* Re-check page status with lock held. */
if ( !allocated_in_map(pfn) )
{
@@ -385,11 +394,10 @@
unmap_domain_page(p);
}
}
-
- spin_unlock_irqrestore(&heap_lock, flags);
- }
-
- watchdog_enable();
+
+ spin_unlock_irq(&heap_lock);
+ }
+
printk("done.\n");
}
@@ -472,14 +480,21 @@
{
ASSERT(!in_irq());
- ps = round_pgup(ps);
- pe = round_pgdown(pe);
-
- init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
-}
-
-
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
+ ps = round_pgup(ps) >> PAGE_SHIFT;
+ pe = round_pgdown(pe) >> PAGE_SHIFT;
+
+ if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) {
+ init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
+ init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN),
+ pe - MAX_DMADOM_PFN);
+ }
+ else
+ init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
+}
+
+
+struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order,
+ unsigned int flags)
{
struct pfn_info *pg;
cpumask_t mask;
@@ -487,8 +502,13 @@
ASSERT(!in_irq());
- if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
- return NULL;
+ pg = NULL;
+ if (! (flags & ALLOC_DOM_DMA))
+ pg = alloc_heap_pages(MEMZONE_DOM, order);
+ if (pg == NULL) {
+ if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) )
+ return NULL;
+ }
mask = pg->u.free.cpumask;
tlbflush_filter(mask, pg->tlbflush_timestamp);
@@ -529,7 +549,7 @@
DPRINTK("...or the domain is dying (%d)\n",
!!test_bit(_DOMF_dying, &d->domain_flags));
spin_unlock(&d->page_alloc_lock);
- free_heap_pages(MEMZONE_DOM, pg, order);
+ free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
return NULL;
}
@@ -594,7 +614,7 @@
if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
{
- free_heap_pages(MEMZONE_DOM, pg, order);
+ free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
}
else
{
@@ -614,7 +634,7 @@
else
{
/* Freeing an anonymous domain-heap page. */
- free_heap_pages(MEMZONE_DOM, pg, order);
+ free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
drop_dom_ref = 0;
}
@@ -625,7 +645,7 @@
unsigned long avail_domheap_pages(void)
{
- return avail[MEMZONE_DOM];
+ return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM];
}
@@ -674,7 +694,7 @@
p = map_domain_page(page_to_pfn(pg));
clear_page(p);
unmap_domain_page(p);
- free_heap_pages(MEMZONE_DOM, pg, 0);
+ free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, 0);
}
} while ( (NOW() - start) < MILLISECS(1) );
}
diff -r a4196568095c -r b53a65034532 xen/common/policy_ops.c
--- a/xen/common/policy_ops.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/policy_ops.c Fri Jul 29 20:25:03 2005
@@ -36,11 +36,6 @@
}
#else
-
-/* function prototypes defined in acm/acm_policy.c */
-int acm_set_policy(void *buf, u16 buf_size, u16 policy);
-int acm_get_policy(void *buf, u16 buf_size);
-int acm_dump_statistics(void *buf, u16 buf_size);
typedef enum policyoperation {
POLICY, /* access to policy interface (early drop) */
@@ -89,7 +84,8 @@
ret = acm_set_policy(
op->u.setpolicy.pushcache,
op->u.setpolicy.pushcache_size,
- op->u.setpolicy.policy_type);
+ op->u.setpolicy.policy_type,
+ 1);
if (ret == ACM_OK)
ret = 0;
else
diff -r a4196568095c -r b53a65034532 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Fri Jul 29 18:52:33 2005
+++ b/xen/common/sched_sedf.c Fri Jul 29 20:25:03 2005
@@ -609,15 +609,16 @@
inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
inf->slice;
else
- /*give a domain w/ exweight = 1 as much as a domain with
- util = 1/128*/
+ /*conversion between realtime utilisation and extrawieght:
+ full (ie 100%) utilization is equivalent to 128 extraweight*/
inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
}
check_extra_queues:
/* Adding a runnable domain to the right queue and removing blocked ones*/
if (sedf_runnable(d)) {
/*add according to score: weighted round robin*/
- if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
+ if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
+ ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
extraq_add_sort_update(d, i, oldscore);
}
else {
@@ -627,12 +628,9 @@
/*make sure that we remove a blocked domain from the other
extraq too*/
if (i == EXTRA_PEN_Q) {
- if (extraq_on(d, EXTRA_UTIL_Q))
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else {
- if (extraq_on(d, EXTRA_PEN_Q))
- extraq_del(d, EXTRA_PEN_Q);
+ if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
+ } else {
+ if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
}
#endif
}
@@ -668,7 +666,8 @@
if (!list_empty(extraq[EXTRA_UTIL_Q])) {
/*use elements from the normal extraqueue*/
runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
- struct sedf_vcpu_info,
extralist[EXTRA_UTIL_Q]);
+ struct sedf_vcpu_info,
+ extralist[EXTRA_UTIL_Q]);
runinf->status |= EXTRA_RUN_UTIL;
ret.task = runinf->vcpu;
ret.time = EXTRA_QUANTUM;
@@ -943,8 +942,7 @@
inf->status |= EXTRA_WANT_PEN_Q;
/*(re-)add domain to the penalty extraq*/
- extraq_add_sort_update(inf->vcpu,
- EXTRA_PEN_Q, 0);
+ extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
}
}
/*give it a fresh slice in the next period!*/
@@ -1119,7 +1117,8 @@
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
- PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
d->vcpu_id);
+ PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
+ d->vcpu_id);
if (unlikely(is_idle_task(d->domain)))
return;
@@ -1145,7 +1144,7 @@
inf->block_tot++;
#endif
if (unlikely(now < PERIOD_BEGIN(inf))) {
- PRINT(4,"extratime unblock\n");
+ PRINT(4,"extratime unblock\n");
/* unblocking in extra-time! */
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
if (inf->status & EXTRA_WANT_PEN_Q) {
@@ -1226,6 +1225,9 @@
/*check whether the awakened task needs to invoke the do_schedule
routine. Try to avoid unnecessary runs but:
Save approximation: Always switch to scheduler!*/
+ ASSERT(d->processor >= 0);
+ ASSERT(d->processor < NR_CPUS);
+ ASSERT(schedule_data[d->processor].curr);
if (should_switch(schedule_data[d->processor].curr, d, now))
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
diff -r a4196568095c -r b53a65034532 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Fri Jul 29 18:52:33 2005
+++ b/xen/drivers/char/console.c Fri Jul 29 20:25:03 2005
@@ -635,8 +635,6 @@
debugtrace_bytes = bytes;
- memset(debugtrace_buf, '\0', debugtrace_bytes);
-
return 0;
}
__initcall(debugtrace_init);
diff -r a4196568095c -r b53a65034532 xen/include/acm/acm_core.h
--- a/xen/include/acm/acm_core.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/acm/acm_core.h Fri Jul 29 20:25:03 2005
@@ -113,6 +113,9 @@
/* protos */
int acm_init_domain_ssid(domid_t id, ssidref_t ssidref);
int acm_free_domain_ssid(struct acm_ssid_domain *ssid);
+int acm_set_policy(void *buf, u16 buf_size, u16 policy, int isuserbuffer);
+int acm_get_policy(void *buf, u16 buf_size);
+int acm_dump_statistics(void *buf, u16 buf_size);
#endif
diff -r a4196568095c -r b53a65034532 xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/acm/acm_hooks.h Fri Jul 29 20:25:03 2005
@@ -24,6 +24,7 @@
#include <xen/lib.h>
#include <xen/delay.h>
#include <xen/sched.h>
+#include <xen/multiboot.h>
#include <public/acm.h>
#include <acm/acm_core.h>
#include <public/dom0_ops.h>
@@ -136,7 +137,9 @@
{ return 0; }
static inline int acm_pre_grant_setup(domid_t id)
{ return 0; }
-static inline int acm_init(void)
+static inline int acm_init(unsigned int *initrdidx,
+ const multiboot_info_t *mbi,
+ unsigned long start)
{ return 0; }
static inline void acm_post_domain0_create(domid_t domid)
{ return; }
@@ -337,7 +340,9 @@
acm_post_domain_create(domid, ACM_DOM0_SSIDREF);
}
-extern int acm_init(void);
+extern int acm_init(unsigned int *initrdidx,
+ const multiboot_info_t *mbi,
+ unsigned long start);
#endif
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/apicdef.h
--- a/xen/include/asm-x86/apicdef.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/apicdef.h Fri Jul 29 20:25:03 2005
@@ -108,10 +108,11 @@
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#ifdef CONFIG_NUMA
- #define MAX_IO_APICS 32
+/* These limits are dictated by ES7000 hardware. */
+#ifdef __i386__
+ #define MAX_IO_APICS 65
#else
- #define MAX_IO_APICS 8
+ #define MAX_IO_APICS 129
#endif
/*
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/config.h Fri Jul 29 20:25:03 2005
@@ -23,6 +23,7 @@
#define CONFIG_X86_LOCAL_APIC 1
#define CONFIG_X86_GOOD_APIC 1
#define CONFIG_X86_IO_APIC 1
+#define CONFIG_HPET_TIMER 1
/* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
#define CONFIG_X86_L1_CACHE_SHIFT 7
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/fixmap.h Fri Jul 29 20:25:03 2005
@@ -30,6 +30,8 @@
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+ FIX_HPET_BASE,
+ FIX_CYCLONE_TIMER,
__end_of_fixed_addresses
};
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/genapic.h
--- a/xen/include/asm-x86/genapic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/genapic.h Fri Jul 29 20:25:03 2005
@@ -30,7 +30,6 @@
unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
unsigned long (*check_apicid_present)(int apicid);
int no_balance_irq;
- int no_ioapic_check;
void (*init_apic_ldr)(void);
physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
@@ -78,7 +77,6 @@
.int_delivery_mode = INT_DELIVERY_MODE, \
.int_dest_mode = INT_DEST_MODE, \
.no_balance_irq = NO_BALANCE_IRQ, \
- .no_ioapic_check = NO_IOAPIC_CHECK, \
.ESR_DISABLE = esr_disable, \
.apic_destination_logical = APIC_DEST_LOGICAL, \
APICFUNC(apic_id_registered), \
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-bigsmp/mach_apic.h
--- a/xen/include/asm-x86/mach-bigsmp/mach_apic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-bigsmp/mach_apic.h Fri Jul 29 20:25:03 2005
@@ -13,8 +13,6 @@
#define NO_BALANCE_IRQ (1)
#define esr_disable (1)
-
-#define NO_IOAPIC_CHECK (0)
static inline int apic_id_registered(void)
{
diff -r a4196568095c -r b53a65034532
xen/include/asm-x86/mach-default/mach_apic.h
--- a/xen/include/asm-x86/mach-default/mach_apic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-default/mach_apic.h Fri Jul 29 20:25:03 2005
@@ -18,8 +18,6 @@
#define NO_BALANCE_IRQ (0)
#define esr_disable (0)
-
-#define NO_IOAPIC_CHECK (0)
#define INT_DELIVERY_MODE dest_LowestPrio
#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-es7000/mach_apic.h
--- a/xen/include/asm-x86/mach-es7000/mach_apic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-es7000/mach_apic.h Fri Jul 29 20:25:03 2005
@@ -37,8 +37,6 @@
#define APIC_DEST_LOGICAL 0x0
#define WAKE_SECONDARY_VIA_INIT
#endif
-
-#define NO_IOAPIC_CHECK (1)
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
diff -r a4196568095c -r b53a65034532
xen/include/asm-x86/mach-generic/mach_apic.h
--- a/xen/include/asm-x86/mach-generic/mach_apic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h Fri Jul 29 20:25:03 2005
@@ -5,7 +5,6 @@
#define esr_disable (genapic->ESR_DISABLE)
#define NO_BALANCE_IRQ (genapic->no_balance_irq)
-#define NO_IOAPIC_CHECK (genapic->no_ioapic_check)
#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
#define INT_DEST_MODE (genapic->int_dest_mode)
#undef APIC_DEST_LOGICAL
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-summit/mach_apic.h
--- a/xen/include/asm-x86/mach-summit/mach_apic.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-summit/mach_apic.h Fri Jul 29 20:25:03 2005
@@ -6,8 +6,6 @@
#define esr_disable (1)
#define NO_BALANCE_IRQ (0)
-
-#define NO_IOAPIC_CHECK (1) /* Don't check I/O APIC ID for xAPIC */
/* In clustered mode, the high nibble of APIC ID is a cluster number.
* The low nibble is a 4-bit bitmap. */
diff -r a4196568095c -r b53a65034532
xen/include/asm-x86/mach-summit/mach_mpparse.h
--- a/xen/include/asm-x86/mach-summit/mach_mpparse.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/mach-summit/mach_mpparse.h Fri Jul 29 20:25:03 2005
@@ -30,7 +30,7 @@
(!strncmp(productid, "VIGIL SMP", 9)
|| !strncmp(productid, "EXA", 3)
|| !strncmp(productid, "RUTHLESS SMP", 12))){
- /*use_cyclone = 1;*/ /*enable cyclone-timer*/
+ use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
/*usb_early_handoff = 1;*/
return 1;
@@ -44,7 +44,7 @@
if (!strncmp(oem_id, "IBM", 3) &&
(!strncmp(oem_table_id, "SERVIGIL", 8)
|| !strncmp(oem_table_id, "EXA", 3))){
- /*use_cyclone = 1;*/ /*enable cyclone-timer*/
+ use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
/*usb_early_handoff = 1;*/
return 1;
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/shadow.h Fri Jul 29 20:25:03 2005
@@ -131,12 +131,12 @@
unsigned long pa, l2_pgentry_t l2e,
struct domain_mmap_cache *cache);
#if CONFIG_PAGING_LEVELS >= 3
+#include <asm/page-guest32.h>
extern void shadow_l3_normal_pt_update(struct domain *d,
unsigned long pa, l3_pgentry_t l3e,
struct domain_mmap_cache *cache);
#endif
#if CONFIG_PAGING_LEVELS >= 4
-#include <asm/page-guest32.h>
extern void shadow_l4_normal_pt_update(struct domain *d,
unsigned long pa, l4_pgentry_t l4e,
struct domain_mmap_cache *cache);
@@ -631,82 +631,6 @@
}
#endif
-#if CONFIG_PAGING_LEVELS == 3
-/* dummy functions, PAE has no shadow support yet */
-
-static inline void
-__shadow_get_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
-{
- BUG();
-}
-
-static inline void
-__shadow_set_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t value)
-{
- BUG();
-}
-
-static inline void
-__guest_get_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
-{
- BUG();
-}
-
-static inline void
-__guest_set_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t value)
-{
- BUG();
-}
-
-static inline void shadow_drop_references(
- struct domain *d, struct pfn_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) ||
- ((page->u.inuse.type_info & PGT_count_mask) == 0) )
- return;
- BUG();
-}
-
-static inline void shadow_sync_and_drop_references(
- struct domain *d, struct pfn_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) )
- return;
- BUG();
-}
-
-static inline int l1pte_write_fault(
- struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
- unsigned long va)
-{
- BUG();
- return 42;
-}
-
-static inline int l1pte_read_fault(
- struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
-{
- BUG();
- return 42;
-}
-
-void static inline
-shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
-{
- BUG();
-}
-
-static inline unsigned long gva_to_gpa(unsigned long gva)
-{
- BUG();
- return 42;
-}
-#endif
-
/************************************************************************/
/*
@@ -1691,8 +1615,10 @@
/************************************************************************/
static inline int
-shadow_mode_page_writable(struct domain *d, unsigned long gpfn)
-{
+shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs,
unsigned long gpfn)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
unsigned long mfn = __gpfn_to_mfn(d, gpfn);
u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask;
@@ -1701,11 +1627,14 @@
type = shadow_max_pgtable_type(d, gpfn, NULL);
if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
- (type == PGT_l1_page_table) )
+ (type == PGT_l1_page_table) &&
+ (va < HYPERVISOR_VIRT_START) &&
+ KERNEL_MODE(v, regs) )
return 1;
if ( shadow_mode_write_all(d) &&
- type && (type <= PGT_l4_page_table) )
+ type && (type <= PGT_l4_page_table) &&
+ KERNEL_MODE(v, regs) )
return 1;
return 0;
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/shadow_64.h Fri Jul 29 20:25:03 2005
@@ -85,8 +85,10 @@
return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES -
1));
case 3:
return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES -
1));
+#if CONFIG_PAGING_LEVELS >= 4
case 4:
return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES -
1));
+#endif
default:
//printk("<table_offset_64> level %d is too big\n", level);
return -1;
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow_public.h
--- a/xen/include/asm-x86/shadow_public.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/shadow_public.h Fri Jul 29 20:25:03 2005
@@ -21,7 +21,7 @@
#ifndef _XEN_SHADOW_PUBLIC_H
#define _XEN_SHADOW_PUBLIC_H
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
#define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
extern int alloc_p2m_table(struct domain *d);
@@ -30,10 +30,6 @@
struct domain *d, struct pfn_info *page);
extern void shadow_drop_references(
struct domain *d, struct pfn_info *page);
-
-extern void shadow_l4_normal_pt_update(struct domain *d,
- unsigned long pa, l4_pgentry_t l4e,
- struct domain_mmap_cache *cache);
extern int shadow_set_guest_paging_levels(struct domain *d, int levels);
@@ -56,4 +52,10 @@
};
#endif
+#if CONFIG_PAGING_LEVELS >= 4
+extern void shadow_l4_normal_pt_update(struct domain *d,
+ unsigned long pa, l4_pgentry_t l4e,
+ struct domain_mmap_cache *cache);
#endif
+
+#endif
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/time.h Fri Jul 29 20:25:03 2005
@@ -4,4 +4,7 @@
extern int timer_ack;
+extern void calibrate_tsc_bp(void);
+extern void calibrate_tsc_ap(void);
+
#endif /* __X86_TIME_H__ */
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/types.h
--- a/xen/include/asm-x86/types.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/types.h Fri Jul 29 20:25:03 2005
@@ -1,10 +1,9 @@
#ifndef __X86_TYPES_H__
#define __X86_TYPES_H__
-/*
- * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
- * header files exported to user space
- */
+#ifndef __ASSEMBLY__
+
+#include <xen/config.h>
typedef __signed__ char __s8;
typedef unsigned char __u8;
@@ -25,8 +24,6 @@
#endif
#endif
-#include <xen/config.h>
-
typedef signed char s8;
typedef unsigned char u8;
@@ -39,9 +36,6 @@
#if defined(__i386__)
typedef signed long long s64;
typedef unsigned long long u64;
-#define BITS_PER_LONG 32
-#define BYTES_PER_LONG 4
-#define LONG_BYTEORDER 2
#if defined(CONFIG_X86_PAE)
typedef u64 physaddr_t;
#else
@@ -50,12 +44,21 @@
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
-#define BITS_PER_LONG 64
-#define BYTES_PER_LONG 8
-#define LONG_BYTEORDER 3
typedef u64 physaddr_t;
#endif
typedef unsigned long size_t;
+#endif /* __ASSEMBLY__ */
+
+#if defined(__i386__)
+#define BITS_PER_LONG 32
+#define BYTES_PER_LONG 4
+#define LONG_BYTEORDER 2
+#elif defined(__x86_64__)
+#define BITS_PER_LONG 64
+#define BYTES_PER_LONG 8
+#define LONG_BYTEORDER 3
+#endif
+
#endif /* __X86_TYPES_H__ */
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/vmx.h Fri Jul 29 20:25:03 2005
@@ -61,6 +61,7 @@
CPU_BASED_INVDPG_EXITING | \
CPU_BASED_MWAIT_EXITING | \
CPU_BASED_MOV_DR_EXITING | \
+ CPU_BASED_ACTIVATE_IO_BITMAP | \
CPU_BASED_UNCOND_IO_EXITING \
)
@@ -447,4 +448,8 @@
return get_sp(d)->sp_global.eport;
}
+/* Prototypes */
+void load_cpu_user_regs(struct cpu_user_regs *regs);
+void store_cpu_user_regs(struct cpu_user_regs *regs);
+
#endif /* __ASM_X86_VMX_H__ */
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx_virpit.h
--- a/xen/include/asm-x86/vmx_virpit.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/vmx_virpit.h Fri Jul 29 20:25:03 2005
@@ -19,6 +19,7 @@
/* for simulation of counter 0 in mode 2*/
int vector; /* the pit irq vector */
unsigned int period; /* the frequency. e.g. 10ms*/
+ s_time_t scheduled; /* scheduled timer interrupt */
unsigned int channel; /* the pit channel, counter 0~2 */
u64 *intr_bitmap;
unsigned int pending_intr_nr; /* the couner for pending timer
interrupts */
diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h Fri Jul 29 20:25:03 2005
@@ -69,6 +69,8 @@
unsigned long shadow_gs;
};
+#define PC_DEBUG_PORT 0x80
+
struct arch_vmx_struct {
struct vmcs_struct *vmcs; /* VMCS pointer in virtual */
unsigned long flags; /* VMCS flags */
@@ -76,6 +78,7 @@
unsigned long cpu_cr3;
unsigned long cpu_state;
struct msr_state msr_content;
+ void *io_bitmap_a, *io_bitmap_b;
};
#define vmx_schedule_tail(next) \
@@ -97,6 +100,8 @@
int store_vmcs(struct arch_vmx_struct *, u64);
int construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *,
struct vcpu_guest_context *, int);
+int modify_vmcs(struct arch_vmx_struct *arch_vmx,
+ struct cpu_user_regs *regs);
#define VMCS_USE_HOST_ENV 1
#define VMCS_USE_SEPARATE_ENV 0
diff -r a4196568095c -r b53a65034532 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/public/grant_table.h Fri Jul 29 20:25:03 2005
@@ -213,6 +213,19 @@
s16 status; /* GNTST_* */
} gnttab_dump_table_t;
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate 4
+typedef struct {
+ memory_t mfn; /* 0 */
+ domid_t domid; /* 4 */
+ u16 handle; /* 8 */
+ s16 status; /* 10: GNTST_* */
+ u32 __pad;
+} gnttab_donate_t; /* 14 bytes */
/*
* Bitfield values for update_pin_status.flags.
diff -r a4196568095c -r b53a65034532 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/public/io/netif.h Fri Jul 29 20:25:03 2005
@@ -23,10 +23,17 @@
typedef struct {
u16 id; /* Echoed in response message. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_ref_t gref; /* 2: Reference to incoming granted frame */
+#endif
} netif_rx_request_t;
typedef struct {
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ u32 addr; /* 0: Offset in page of start of received packet */
+#else
memory_t addr; /* Machine address of packet. */
+#endif
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15;
s16 status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
diff -r a4196568095c -r b53a65034532 xen/include/public/xen.h
--- a/xen/include/public/xen.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/public/xen.h Fri Jul 29 20:25:03 2005
@@ -329,12 +329,36 @@
#endif
} vcpu_info_t;
+typedef struct vcpu_time_info {
+ /*
+ * The following values are updated periodically (and not necessarily
+ * atomically!). The guest OS detects this because 'time_version1' is
+ * incremented just before updating these values, and 'time_version2' is
+ * incremented immediately after. See the Xen-specific Linux code for an
+ * example of how to read these values safely (arch/xen/kernel/time.c).
+ */
+ u32 time_version1;
+ u32 time_version2;
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_time; /* Time, in nanosecs, since boot. */
+ /*
+ * Current system time:
+ * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
+ * CPU frequency (Hz):
+ * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+ */
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+} vcpu_time_info_t;
+
/*
* Xen/kernel shared data -- pointer provided in start_info.
* NB. We expect that this struct is smaller than a page.
*/
typedef struct shared_info {
vcpu_info_t vcpu_data[MAX_VIRT_CPUS];
+
+ vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS];
u32 n_vcpu;
@@ -373,33 +397,11 @@
u32 evtchn_mask[32];
/*
- * Time: The following abstractions are exposed: System Time, Clock Time,
- * Domain Virtual Time. Domains can access Cycle counter time directly.
+ * Wallclock time: updated only by control software. Guests should base
+ * their gettimeofday() syscall on this wallclock-base value.
*/
- u64 cpu_freq; /* CPU frequency (Hz). */
-
- /*
- * The following values are updated periodically (and not necessarily
- * atomically!). The guest OS detects this because 'time_version1' is
- * incremented just before updating these values, and 'time_version2' is
- * incremented immediately after. See the Xen-specific Linux code for an
- * example of how to read these values safely (arch/xen/kernel/time.c).
- */
- u32 time_version1;
- u32 time_version2;
- tsc_timestamp_t tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_time; /* Time, in nanosecs, since boot. */
u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
u32 wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */
- u64 domain_time; /* Domain virtual time, in nanosecs. */
-
- /*
- * Timeout values:
- * Allow a domain to specify a timeout value in system time and
- * domain virtual time.
- */
- u64 wall_timeout;
- u64 domain_timeout;
arch_shared_info_t arch;
@@ -444,7 +446,7 @@
memory_t mod_start; /* VIRTUAL address of pre-loaded module. */
memory_t mod_len; /* Size (bytes) of pre-loaded module. */
s8 cmd_line[MAX_GUEST_CMDLINE];
- memory_t store_page; /* VIRTUAL address of store page. */
+ memory_t store_mfn; /* MACHINE page number of shared page. */
u16 store_evtchn; /* Event channel for store communication. */
} start_info_t;
diff -r a4196568095c -r b53a65034532 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/xen/mm.h Fri Jul 29 20:25:03 2005
@@ -33,11 +33,14 @@
/* Domain suballocator. These functions are *not* interrupt-safe.*/
void init_domheap_pages(physaddr_t ps, physaddr_t pe);
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order);
+struct pfn_info *alloc_domheap_pages(
+ struct domain *d, unsigned int order, unsigned int flags);
void free_domheap_pages(struct pfn_info *pg, unsigned int order);
unsigned long avail_domheap_pages(void);
-#define alloc_domheap_page(d) (alloc_domheap_pages(d,0))
+#define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
#define free_domheap_page(p) (free_domheap_pages(p,0))
+
+#define ALLOC_DOM_DMA 1
/* Automatic page scrubbing for dead domains. */
extern struct list_head page_scrub_list;
diff -r a4196568095c -r b53a65034532 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/xen/sched.h Fri Jul 29 20:25:03 2005
@@ -92,7 +92,6 @@
domid_t domain_id;
shared_info_t *shared_info; /* shared data area */
- spinlock_t time_lock;
spinlock_t big_lock;
diff -r a4196568095c -r b53a65034532 xen/include/xen/time.h
--- a/xen/include/xen/time.h Fri Jul 29 18:52:33 2005
+++ b/xen/include/xen/time.h Fri Jul 29 20:25:03 2005
@@ -30,7 +30,8 @@
#include <public/xen.h>
#include <asm/time.h>
-extern int init_xen_time();
+extern int init_xen_time(void);
+extern void init_percpu_time(void);
extern unsigned long cpu_khz;
diff -r a4196568095c -r b53a65034532 xen/tools/Makefile
--- a/xen/tools/Makefile Fri Jul 29 18:52:33 2005
+++ b/xen/tools/Makefile Fri Jul 29 20:25:03 2005
@@ -1,6 +1,13 @@
+
+include $(BASEDIR)/../Config.mk
default:
$(MAKE) -C figlet
+ $(MAKE) symbols
clean:
- $(MAKE) -C figlet clean
\ No newline at end of file
+ $(MAKE) -C figlet clean
+ rm -f *.o symbols
+
+symbols: symbols.c
+ $(HOSTCC) -o $@ $<
diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/kernel/ptrace.c
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/linux-2.6-xen-sparse/kernel/ptrace.c Fri Jul 29 20:25:03 2005
@@ -0,0 +1,391 @@
+/*
+ * linux/kernel/ptrace.c
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Common interfaces for "ptrace()" which we do not want
+ * to continually duplicate across every architecture.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+
+/*
+ * ptrace a task: make the debugger its new parent and
+ * move it to the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_link(task_t *child, task_t *new_parent)
+{
+ if (!list_empty(&child->ptrace_list))
+ BUG();
+ if (child->parent == new_parent)
+ return;
+ list_add(&child->ptrace_list, &child->parent->ptrace_children);
+ REMOVE_LINKS(child);
+ child->parent = new_parent;
+ SET_LINKS(child);
+}
+
+/*
+ * Turn a tracing stop into a normal stop now, since with no tracer there
+ * would be no way to wake it up with SIGCONT or SIGKILL. If there was a
+ * signal sent that would resume the child, but didn't because it was in
+ * TASK_TRACED, resume it now.
+ * Requires that irqs be disabled.
+ */
+void ptrace_untrace(task_t *child)
+{
+ spin_lock(&child->sighand->siglock);
+ if (child->state == TASK_TRACED) {
+ if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+ child->state = TASK_STOPPED;
+ } else {
+ signal_wake_up(child, 1);
+ }
+ }
+ spin_unlock(&child->sighand->siglock);
+}
+
+/*
+ * unptrace a task: move it back to its original parent and
+ * remove it from the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_unlink(task_t *child)
+{
+ if (!child->ptrace)
+ BUG();
+ child->ptrace = 0;
+ if (!list_empty(&child->ptrace_list)) {
+ list_del_init(&child->ptrace_list);
+ REMOVE_LINKS(child);
+ child->parent = child->real_parent;
+ SET_LINKS(child);
+ }
+
+ if (child->state == TASK_TRACED)
+ ptrace_untrace(child);
+}
+
+/*
+ * Check that we have indeed attached to the thing..
+ */
+int ptrace_check_attach(struct task_struct *child, int kill)
+{
+ int ret = -ESRCH;
+
+ /*
+ * We take the read lock around doing both checks to close a
+ * possible race where someone else was tracing our child and
+ * detached between these two checks. After this locked check,
+ * we are sure that this is our traced child and that can only
+ * be changed by us so it's not changing right after this.
+ */
+ read_lock(&tasklist_lock);
+ if ((child->ptrace & PT_PTRACED) && child->parent == current &&
+ (!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
+ && child->signal != NULL) {
+ ret = 0;
+ spin_lock_irq(&child->sighand->siglock);
+ if (child->state == TASK_STOPPED) {
+ child->state = TASK_TRACED;
+ } else if (child->state != TASK_TRACED && !kill) {
+ ret = -ESRCH;
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+
+ if (!ret && !kill) {
+ wait_task_inactive(child);
+ }
+
+ /* All systems go.. */
+ return ret;
+}
+
+int ptrace_attach(struct task_struct *task)
+{
+ int retval;
+ task_lock(task);
+ retval = -EPERM;
+ if (task->pid <= 1)
+ goto bad;
+ if (task == current)
+ goto bad;
+ if (!task->mm)
+ goto bad;
+ if(((current->uid != task->euid) ||
+ (current->uid != task->suid) ||
+ (current->uid != task->uid) ||
+ (current->gid != task->egid) ||
+ (current->gid != task->sgid) ||
+ (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
+ goto bad;
+ smp_rmb();
+ if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ goto bad;
+ /* the same process cannot be attached many times */
+ if (task->ptrace & PT_PTRACED)
+ goto bad;
+ retval = security_ptrace(current, task);
+ if (retval)
+ goto bad;
+
+ /* Go */
+ task->ptrace |= PT_PTRACED | ((task->real_parent != current)
+ ? PT_ATTACHED : 0);
+ if (capable(CAP_SYS_PTRACE))
+ task->ptrace |= PT_PTRACE_CAP;
+ task_unlock(task);
+
+ write_lock_irq(&tasklist_lock);
+ __ptrace_link(task, current);
+ write_unlock_irq(&tasklist_lock);
+
+ force_sig_specific(SIGSTOP, task);
+ return 0;
+
+bad:
+ task_unlock(task);
+ return retval;
+}
+
+int ptrace_detach(struct task_struct *child, unsigned int data)
+{
+ if (!valid_signal(data))
+ return -EIO;
+
+ /* Architecture-specific hardware disable .. */
+ ptrace_disable(child);
+
+ /* .. re-parent .. */
+ child->exit_code = data;
+
+ write_lock_irq(&tasklist_lock);
+ __ptrace_unlink(child);
+ /* .. and wake it up. */
+ if (child->exit_state != EXIT_ZOMBIE)
+ wake_up_process(child);
+ write_unlock_irq(&tasklist_lock);
+
+ return 0;
+}
+
+/*
+ * Access another process' address space.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf,
int len, int write)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *old_buf = buf;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return 0;
+
+ down_read(&mm->mmap_sem);
+ /* ignore errors, just check how much was sucessfully transfered */
+ while (len) {
+ int bytes, ret, offset;
+ void *maddr;
+
+ ret = get_user_pages(tsk, mm, addr, 1,
+ write, 1, &page, &vma);
+ if (ret <= 0)
+ break;
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ page_cache_release(page);
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+ return buf - old_buf;
+}
+EXPORT_SYMBOL(access_process_vm);
+
+int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user
*dst, int len)
+{
+ int copied = 0;
+
+ while (len > 0) {
+ char buf[128];
+ int this_len, retval;
+
+ this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+ retval = access_process_vm(tsk, src, buf, this_len, 0);
+ if (!retval) {
+ if (copied)
+ break;
+ return -EIO;
+ }
+ if (copy_to_user(dst, buf, retval))
+ return -EFAULT;
+ copied += retval;
+ src += retval;
+ dst += retval;
+ len -= retval;
+ }
+ return copied;
+}
+
+int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long
dst, int len)
+{
+ int copied = 0;
+
+ while (len > 0) {
+ char buf[128];
+ int this_len, retval;
+
+ this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+ if (copy_from_user(buf, src, this_len))
+ return -EFAULT;
+ retval = access_process_vm(tsk, dst, buf, this_len, 1);
+ if (!retval) {
+ if (copied)
+ break;
+ return -EIO;
+ }
+ copied += retval;
+ src += retval;
+ dst += retval;
+ len -= retval;
+ }
+ return copied;
+}
+
+static int ptrace_setoptions(struct task_struct *child, long data)
+{
+ child->ptrace &= ~PT_TRACE_MASK;
+
+ if (data & PTRACE_O_TRACESYSGOOD)
+ child->ptrace |= PT_TRACESYSGOOD;
+
+ if (data & PTRACE_O_TRACEFORK)
+ child->ptrace |= PT_TRACE_FORK;
+
+ if (data & PTRACE_O_TRACEVFORK)
+ child->ptrace |= PT_TRACE_VFORK;
+
+ if (data & PTRACE_O_TRACECLONE)
+ child->ptrace |= PT_TRACE_CLONE;
+
+ if (data & PTRACE_O_TRACEEXEC)
+ child->ptrace |= PT_TRACE_EXEC;
+
+ if (data & PTRACE_O_TRACEVFORKDONE)
+ child->ptrace |= PT_TRACE_VFORK_DONE;
+
+ if (data & PTRACE_O_TRACEEXIT)
+ child->ptrace |= PT_TRACE_EXIT;
+
+ return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
+}
+
+static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user *
data)
+{
+ siginfo_t lastinfo;
+ int error = -ESRCH;
+
+ read_lock(&tasklist_lock);
+ if (likely(child->sighand != NULL)) {
+ error = -EINVAL;
+ spin_lock_irq(&child->sighand->siglock);
+ if (likely(child->last_siginfo != NULL)) {
+ lastinfo = *child->last_siginfo;
+ error = 0;
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+ if (!error)
+ return copy_siginfo_to_user(data, &lastinfo);
+ return error;
+}
+
+static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user *
data)
+{
+ siginfo_t newinfo;
+ int error = -ESRCH;
+
+ if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
+ return -EFAULT;
+
+ read_lock(&tasklist_lock);
+ if (likely(child->sighand != NULL)) {
+ error = -EINVAL;
+ spin_lock_irq(&child->sighand->siglock);
+ if (likely(child->last_siginfo != NULL)) {
+ *child->last_siginfo = newinfo;
+ error = 0;
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+ return error;
+}
+
+int ptrace_request(struct task_struct *child, long request,
+ long addr, long data)
+{
+ int ret = -EIO;
+
+ switch (request) {
+#ifdef PTRACE_OLDSETOPTIONS
+ case PTRACE_OLDSETOPTIONS:
+#endif
+ case PTRACE_SETOPTIONS:
+ ret = ptrace_setoptions(child, data);
+ break;
+ case PTRACE_GETEVENTMSG:
+ ret = put_user(child->ptrace_message, (unsigned long __user *)
data);
+ break;
+ case PTRACE_GETSIGINFO:
+ ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
+ break;
+ case PTRACE_SETSIGINFO:
+ ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
diff -r a4196568095c -r b53a65034532
tools/debugger/pdb/linux-2.6-module/pdb_debug.h
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Fri Jul 29 20:25:03 2005
@@ -0,0 +1,46 @@
+
+#ifndef __PDB_DEBUG_H_
+#define __PDB_DEBUG_H_
+
+/* debugger.c */
+void pdb_initialize_bwcpoint (void);
+int pdb_suspend (struct task_struct *target);
+int pdb_resume (struct task_struct *target);
+int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op);
+int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op);
+int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req,
+ pdb_op_rd_mem_resp_p resp);
+int pdb_write_memory (struct task_struct *target, pdb_op_wr_mem_p op);
+int pdb_access_memory (struct task_struct *target, unsigned long address,
+ void *buffer, int length, int write);
+int pdb_continue (struct task_struct *target);
+int pdb_step (struct task_struct *target);
+
+int pdb_insert_memory_breakpoint (struct task_struct *target,
+ memory_t address, u32 length);
+int pdb_remove_memory_breakpoint (struct task_struct *target,
+ memory_t address, u32 length);
+
+int pdb_exceptions_notify (struct notifier_block *self, unsigned long val,
+ void *data);
+
+int pdb_debug_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition);
+int pdb_int3_fn (struct pt_regs *regs, long error_code);
+
+/* module.c */
+void pdb_send_response (pdb_response_t *response);
+
+#endif
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/CreateDomain.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/CreateDomain.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,163 @@
+from xen.sv.Wizard import *
+from xen.sv.util import *
+from xen.sv.GenTabbed import PreTab
+
+from xen.xm.create import make_config, OptVals
+
+from xen.xend.XendClient import server
+
+class CreateDomain( Wizard ):
+ def __init__( self, urlWriter ):
+
+ sheets = [ CreatePage0,
+ CreatePage1,
+ CreatePage2,
+ CreatePage3,
+ CreatePage4,
+ CreateFinish ]
+
+ Wizard.__init__( self, urlWriter, "Create Domain", sheets )
+
+class CreatePage0( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "General", 0 )
+ self.addControl( InputControl( 'name', 'VM Name', 'VM Name:',
"[\\w|\\S]+", "You must enter a name in this field" ) )
+ self.addControl( InputControl( 'memory', '64', 'Memory (Mb):',
"[\\d]+", "You must enter a number in this field" ) )
+ self.addControl( InputControl( 'cpu', '0', 'CPU:', "[\\d]+", "You must
enter a number in this feild" ) )
+ self.addControl( InputControl( 'cpu_weight', '1', 'CPU Weight:',
"[\\d]+", "You must enter a number in this feild" ) )
+
+class CreatePage1( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Setup Kernel Image", 1 )
+# For now we don't need to select a builder...
+# self.addControl( ListControl( 'builder', [('linux', 'Linux'),
('netbsd', 'NetBSD')], 'Kernel Type:' ) )
+ self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.9-xenU',
'Kernel Image:' ) )
+ self.addControl( InputControl( 'extra', '', 'Kernel Command Line
Parameters:' ) )
+
+class CreatePage2( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 2 )
+ self.addControl( InputControl( 'num_vbds', '1', 'Number of VBDs:',
'[\\d]+', "You must enter a number in this field" ) )
+
+class CreatePage3( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 3 )
+
+ def write_BODY( self, request, err ):
+ if not self.passback: self.parseForm( request )
+
+ previous_values = sxp2hash( string2sxp( self.passback ) ) #get the hash
for quick reference
+
+ num_vbds = previous_values.get( 'num_vbds' )
+
+ for i in range( int( num_vbds ) ):
+ self.addControl( InputControl( 'vbd%s_dom0' % i, 'phy:sda%s' %
str(i + 1), 'Device %s name:' % i ) )
+ self.addControl( InputControl( 'vbd%s_domU' % i, 'sda%s' % str(i +
1), 'Virtualized device %s:' % i ) )
+ self.addControl( ListControl( 'vbd%s_mode' % i, [('w', 'Read +
Write'), ('r', 'Read Only')], 'Device %s mode:' % i ) )
+
+ self.addControl( InputControl( 'root', '/dev/sda1', 'Root device (in
VM):' ) )
+
+ Sheet.write_BODY( self, request, err )
+
+class CreatePage4( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Network settings", 4 )
+ self.addControl( ListControl( 'dhcp', [('off', 'No'), ('dhcp',
'Yes')], 'Use DHCP:' ) )
+ self.addControl( InputControl( 'hostname', 'hostname', 'VM Hostname:'
) )
+ self.addControl( InputControl( 'ip_addr', '1.2.3.4', 'VM IP Address:'
) )
+ self.addControl( InputControl( 'ip_subnet', '255.255.255.0', 'VM
Subnet Mask:' ) )
+ self.addControl( InputControl( 'ip_gateway', '1.2.3.4', 'VM Gateway:'
) )
+ self.addControl( InputControl( 'ip_nfs', '1.2.3.4', 'NFS Server:' ) )
+
+class CreateFinish( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "All Done", 5 )
+
+ def write_BODY( self, request, err ):
+
+ if not self.passback: self.parseForm( request )
+
+ xend_sxp = self.translate_sxp( string2sxp( self.passback ) )
+
+ try:
+ dom_sxp = server.xend_domain_create( xend_sxp )
+ success = "Your domain was successfully created.\n"
+ except:
+ success = "There was an error creating your domain.\nThe
configuration used is as follows:\n"
+ dom_sxp = xend_sxp
+
+
+
+ pt = PreTab( success + sxp2prettystring( dom_sxp ) )
+ pt.write_BODY( request )
+
+ request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
+ request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
+
+ def translate_sxp( self, fin_sxp ):
+ fin_hash = ssxp2hash( fin_sxp )
+
+ def get( key ):
+ ret = fin_hash.get( key )
+ if ret:
+ return ret
+ else:
+ return ""
+
+ vals = OptVals()
+
+ vals.name = get( 'name' )
+ vals.memory = get( 'memory' )
+ vals.maxmem = get( 'maxmem' )
+ vals.cpu = get( 'cpu' )
+ vals.cpu_weight = get( 'cpu_weight' )
+
+ vals.builder = get( 'builder' )
+ vals.kernel = get( 'kernel' )
+ vals.root = get( 'root' )
+ vals.extra = get( 'extra' )
+
+ #setup vbds
+
+ vbds = []
+
+ for i in range( int( get( 'num_vbds' ) ) ):
+ vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ),
get( 'vbd%s_mode' % i ) ) )
+
+ vals.disk = vbds
+
+ #misc
+
+ vals.pci = []
+
+ vals.blkif = None
+ vals.netif = None
+ vals.restart = None
+ vals.console = None
+ vals.ramdisk = None
+
+ #setup vifs
+
+ vals.vif = []
+ vals.nics = 1
+
+ ip = get( 'ip_addr' )
+ nfs = get( 'ip_nfs' )
+ gate = get( 'ip_gateway' )
+ mask = get( 'ip_subnet' )
+ host = get( 'hostname' )
+ dhcp = get( 'dhcp' )
+
+ vals.cmdline_ip = "%s:%s:%s:%s:%s:eth0:%s" % (ip, nfs, gate, mask,
host, dhcp)
+
+ try:
+ return make_config( vals )
+ except:
+ return [["Error creating domain config."]]
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Daemon.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/Daemon.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,110 @@
+###########################################################
+## XenSV Web Control Interface Daemon
+## Copyright (C) 2004, K A Fraser (University of Cambridge)
+## Copyright (C) 2004, Mike Wray <mike.wray@xxxxxx>
+## Copyright (C) 2004, Tom Wilkie <tw275@xxxxxxxxx>
+###########################################################
+
+import os
+import os.path
+import sys
+import re
+
+from xen.sv.params import *
+
+from twisted.internet import reactor
+from twisted.web import static, server, script
+
+from xen.util.ip import _readline, _readlines
+
+class Daemon:
+ """The xend daemon.
+ """
+ def __init__(self):
+ self.shutdown = 0
+ self.traceon = 0
+
+ def daemon_pids(self):
+ pids = []
+ pidex = '(?P<pid>\d+)'
+ pythonex = '(?P<python>\S*python\S*)'
+ cmdex = '(?P<cmd>.*)'
+ procre = re.compile('^\s*' + pidex + '\s*' + pythonex + '\s*' + cmdex
+ '$')
+ xendre = re.compile('^/usr/sbin/xend\s*(start|restart)\s*.*$')
+ procs = os.popen('ps -e -o pid,args 2>/dev/null')
+ for proc in procs:
+ pm = procre.match(proc)
+ if not pm: continue
+ xm = xendre.match(pm.group('cmd'))
+ if not xm: continue
+ #print 'pid=', pm.group('pid'), 'cmd=', pm.group('cmd')
+ pids.append(int(pm.group('pid')))
+ return pids
+
+ def new_cleanup(self, kill=0):
+ err = 0
+ pids = self.daemon_pids()
+ if kill:
+ for pid in pids:
+ print "Killing daemon pid=%d" % pid
+ os.kill(pid, signal.SIGHUP)
+ elif pids:
+ err = 1
+ print "Daemon already running: ", pids
+ return err
+
+ def cleanup(self, kill=False):
+ # No cleanup to do if PID_FILE is empty.
+ if not os.path.isfile(PID_FILE) or not os.path.getsize(PID_FILE):
+ return 0
+ # Read the pid of the previous invocation and search active process
list.
+ pid = open(PID_FILE, 'r').read()
+ lines = _readlines(os.popen('ps ' + pid + ' 2>/dev/null'))
+ for line in lines:
+ if re.search('^ *' + pid + '.+xensv', line):
+ if not kill:
+ print "Daemon is already running (pid %d)" % int(pid)
+ return 1
+ # Old daemon is still active: terminate it.
+ os.kill(int(pid), 1)
+ # Delete the stale PID_FILE.
+ os.remove(PID_FILE)
+ return 0
+
+ def start(self, trace=0):
+ if self.cleanup(kill=False):
+ return 1
+
+ # Fork -- parent writes PID_FILE and exits.
+ pid = os.fork()
+ if pid:
+ # Parent
+ pidfile = open(PID_FILE, 'w')
+ pidfile.write(str(pid))
+ pidfile.close()
+ return 0
+ # Child
+ self.run()
+ return 0
+
+ def stop(self):
+ return self.cleanup(kill=True)
+
+ def run(self):
+ root = static.File( SV_ROOT )
+ root.indexNames = [ 'Main.rpy' ]
+ root.processors = { '.rpy': script.ResourceScript }
+ reactor.listenTCP( SV_PORT, server.Site( root ) )
+ reactor.run()
+
+ def exit(self):
+ reactor.disconnectAll()
+ sys.exit(0)
+
+def instance():
+ global inst
+ try:
+ inst
+ except:
+ inst = Daemon()
+ return inst
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/DomInfo.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/DomInfo.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,148 @@
+from xen.xend.XendClient import server
+from xen.xend import PrettyPrint
+
+from xen.sv.HTMLBase import HTMLBase
+from xen.sv.util import *
+from xen.sv.GenTabbed import *
+
+DEBUG=1
+
+class DomInfo( GenTabbed ):
+
+ def __init__( self, urlWriter ):
+
+ self.dom = 0;
+
+ def tabUrlWriter( tab ):
+ return urlWriter( "&dom=%s%s" % ( self.dom, tab ) )
+
+ GenTabbed.__init__( self, "Domain Info", tabUrlWriter, [ 'General',
'SXP', 'Devices' ], [ DomGeneralTab, DomSXPTab, NullTab ] )
+
+ def write_BODY( self, request ):
+ dom = request.args.get('dom')
+
+ if dom is None or len(dom) != 1:
+ request.write( "<p>Please Select a Domain</p>" )
+ return None
+ else:
+ self.dom = dom[0]
+
+ GenTabbed.write_BODY( self, request )
+
+ def write_MENU( self, request ):
+ pass
+
+class DomGeneralTab( CompositeTab ):
+ def __init__( self ):
+ CompositeTab.__init__( self, [ DomGenTab, DomActionTab ] )
+
+class DomGenTab( GeneralTab ):
+
+ def __init__( self ):
+
+ titles = {}
+
+ titles[ 'ID' ] = 'dom'
+ titles[ 'Name' ] = 'name'
+ titles[ 'CPU' ] = 'cpu'
+ titles[ 'Memory' ] = ( 'mem', memoryFormatter )
+ titles[ 'State' ] = ( 'state', stateFormatter )
+ titles[ 'Total CPU' ] = ( 'cpu_time', smallTimeFormatter )
+ titles[ 'Up Time' ] = ( 'up_time', bigTimeFormatter )
+
+ GeneralTab.__init__( self, {}, titles )
+
+ def write_BODY( self, request ):
+
+ self.dom = getVar('dom', request)
+
+ if self.dom is None:
+ request.write( "<p>Please Select a Domain</p>" )
+ return None
+
+ self.dict = getDomInfoHash( self.dom )
+
+ GeneralTab.write_BODY( self, request )
+
+class DomSXPTab( PreTab ):
+
+ def __init__( self ):
+ self.dom = 0
+ PreTab.__init__( self, "" )
+
+
+ def write_BODY( self, request ):
+ self.dom = getVar('dom', request)
+
+ if self.dom is None:
+ request.write( "<p>Please Select a Domain</p>" )
+ return None
+
+ try:
+ domInfo = server.xend_domain( self.dom )
+ except:
+ domInfo = [["Error getting domain details."]]
+
+ self.source = sxp2prettystring( domInfo )
+
+ PreTab.write_BODY( self, request )
+
+class DomActionTab( ActionTab ):
+
+ def __init__( self ):
+ actions = { "shutdown" : "shutdown",
+ "reboot" : "reboot",
+ "pause" : "pause",
+ "unpause" : "unpause",
+ "destroy" : "destroy" }
+ ActionTab.__init__( self, actions )
+
+ def op_shutdown( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != '0':
+ if DEBUG: print ">DomShutDown %s" % dom
+ try:
+ server.xend_domain_shutdown( int( dom ), "halt" )
+ except:
+ pass
+
+ def op_reboot( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != '0':
+ if DEBUG: print ">DomReboot %s" % dom
+ try:
+ server.xend_domain_shutdown( int( dom ), "reboot" )
+ except:
+ pass
+
+ def op_pause( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != '0':
+ if DEBUG: print ">DomPause %s" % dom
+ try:
+ server.xend_domain_pause( int( dom ) )
+ except:
+ pass
+
+ def op_unpause( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != '0':
+ if DEBUG: print ">DomUnpause %s" % dom
+ try:
+ server.xend_domain_unpause( int( dom ) )
+ except:
+ pass
+
+ def op_destroy( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != '0':
+ if DEBUG: print ">DomDestroy %s" % dom
+ try:
+ server.xend_domain_destroy( int( dom ), "halt" )
+ except:
+ pass
+
+
+
+
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/DomList.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/DomList.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,81 @@
+from xen.xend.XendClient import server
+from xen.xend import sxp
+
+from xen.sv.HTMLBase import HTMLBase
+from xen.sv.util import *
+
+class DomList( HTMLBase ):
+
+ isLeaf = True
+
+ def __init__( self, urlWriter ):
+ HTMLBase.__init__(self)
+ self.urlWriter = urlWriter
+
+ def write_MENU( self, request ):
+ return self.write_BODY( request, head=True, long=False )
+
+ def write_BODY( self, request, head=True, long=True ):
+
+ domains = []
+
+ try:
+ domains = server.xend_domains()
+ domains.sort()
+ except:
+ pass
+
+ request.write( "\n<table style='border:0px solid white'
cellspacing='0' cellpadding='0' border='0' width='100%'>\n" )
+
+ if head:
+ request.write( "<tr class='domainInfoHead'>" )
+ self.write_DOMAIN_HEAD( request, long )
+ request.write( "</tr>" )
+
+ odd = True
+
+ if not domains is None:
+ for domain in domains:
+ if odd:
+ request.write( "<tr class='domainInfoOdd'>\n" )
+ odd = False
+ else:
+ request.write( "<tr class='domainInfoEven'>\n" )
+ odd = True
+ self.write_DOMAIN( request, getDomInfoHash( domain ), long )
+ request.write( "</tr>\n" )
+ else:
+ request.write( "<tr colspan='10'><p class='small'>Error getting
domain list<br/>Perhaps XenD not running?</p></tr>")
+
+ request.write( "</table>\n" )
+
+ def write_DOMAIN( self, request, domInfoHash, long=True ):
+ request.write( "<td class='domainInfo' align='center'>%(id)s</td>\n" %
domInfoHash )
+
+ url = self.urlWriter( "&mod=info&dom=%(id)s" % domInfoHash )
+
+ request.write( "<td class='domainInfo' align='center'><a
href='%s'>%s</a></td>\n" % ( url, domInfoHash['name'] ) )
+ if long:
+ request.write( "<td class='domainInfo'
align='center'>%(memory)5s</td>\n" % domInfoHash )
+ request.write( "<td class='domainInfo'
align='center'>%(cpu)2s</td>\n" % domInfoHash )
+ request.write( "<td class='domainInfo'
align='center'>%(state)5s</td>\n" % domInfoHash )
+ if domInfoHash[ 'id' ] != "0":
+ request.write( "<td class='domainInfo' align='center'>" )
+
+ if domInfoHash[ 'state' ][ 2 ] == "-":
+ request.write( "<img src='images/small-pause.png'
onclick='doOp2( \"pause\", \"%(dom)-4s\" )'>" % domInfoHash )
+ else:
+ request.write( "<img src='images/small-unpause.png'
onclick='doOp2( \"unpause\", \"%(dom)-4s\" )'>" % domInfoHash )
+
+ request.write( "<img src='images/small-destroy.png'
onclick='doOp2( \"destroy\", \"%(dom)-4s\" )'></td>" % domInfoHash)
+ else:
+ request.write( "<td> </td>" )
+
+ def write_DOMAIN_HEAD( self, request, long=True ):
+ request.write( "<td class='domainInfoHead'
align='center'>Domain</td>\n" )
+ request.write( "<td class='domainInfoHead' align='center'>Name</td>\n"
)
+ if long:
+ request.write( "<td class='domainInfoHead' align='center'>Memory /
Mb</td>\n" )
+ request.write( "<td class='domainInfoHead'
align='center'>CPU</td>\n" )
+ request.write( "<td class='domainInfoHead'
align='center'>State</td>\n" )
+ request.write( "<td class='domainInfoHead' align='center'></td>\n" )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/GenTabbed.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/GenTabbed.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,135 @@
+import types
+
+from xen.sv.HTMLBase import HTMLBase
+from xen.sv.TabView import TabView
+from xen.sv.util import getVar
+
+class GenTabbed( HTMLBase ):
+
+ def __init__( self, title, urlWriter, tabStrings, tabObjects ):
+ HTMLBase.__init__(self)
+ self.tabStrings = tabStrings
+ self.tabObjects = tabObjects
+ self.urlWriter = urlWriter
+ self.title = title
+
+ def write_BODY( self, request, urlWriter = None ):
+ try:
+ tab = int( getVar( 'tab', request, 0 ) )
+ except:
+ tab = 0
+
+ request.write( "<table style='' width='100%' border='0'
cellspacing='0' cellpadding='0'>" )
+ request.write( "<tr><td>" )
+ request.write( "<p align='center'><u>%s</u></p>" % self.title )
+
+ TabView( tab, self.tabStrings, self.urlWriter ).write_BODY( request )
+
+ request.write( "</td></tr><tr><td>" )
+
+ try:
+ render_tab = self.tabObjects[ tab ]
+ render_tab().write_BODY( request )
+ except:
+ request.write( "<p>Error Rendering Tab</p>" )
+
+ request.write( "</td></tr></table>" )
+
+ def perform( self, request ):
+ try:
+ tab = int( getVar( 'tab', request, 0 ) )
+ except:
+ tab = 0;
+
+ op_tab = self.tabObjects[ tab ]
+
+ if op_tab:
+ op_tab().perform( request )
+
+class PreTab( HTMLBase ):
+
+ def __init__( self, source ):
+ HTMLBase.__init__( self )
+ self.source = source
+
+ def write_BODY( self, request ):
+
+ request.write( "<div style='display: block; overflow: auto; border:
0px solid black; width: 540px; padding: 5px; z-index:0; align: center'><pre>" )
+
+ request.write( self.source )
+
+ request.write( "</pre></div>" )
+
+class GeneralTab( HTMLBase ):
+
+ def __init__( self, dict, titles ):
+ HTMLBase.__init__( self )
+ self.dict = dict
+ self.titles = titles
+
+ def write_BODY( self, request ):
+
+ request.write( "<table width='100%' cellspacing='0' cellpadding='0'
border='0'>" )
+
+ def writeAttr( niceName, attr, formatter=None ):
+ if type( attr ) is types.TupleType:
+ ( attr, formatter ) = attr
+
+ if attr in self.dict:
+ if formatter:
+ temp = formatter( self.dict[ attr ] )
+ else:
+ temp = str( self.dict[ attr ] )
+ request.write( "<tr><td width='50%%'><p>%s:</p></td><td
width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
+
+ for niceName, attr in self.titles.items():
+ writeAttr( niceName, attr )
+
+ request.write( "</table>" )
+
+class NullTab( HTMLBase ):
+
+ def __init__( self ):
+ HTMLBase.__init__( self )
+ self.title = "Null Tab"
+
+ def __init__( self, title ):
+ HTMLBase.__init__( self )
+ self.title = title
+
+ def write_BODY( self, request ):
+ request.write( "<p>%s</p>" % self.title )
+
+class ActionTab( HTMLBase ):
+
+ def __init__( self, actions ):
+ self.actions = actions
+ HTMLBase.__init__( self )
+
+ def write_BODY( self, request ):
+ request.write( "<p align='center'><table cellspacing='3'
cellpadding='2' border='0'><tr>" )
+
+ for ( command, text ) in self.actions.items():
+ request.write( "<td style='border: 1px solid black;
background-color: grey' onmouseover='buttonMouseOver( this )'
onmouseout='buttonMouseOut( this )'>" )
+ request.write( "<p><a href='javascript: doOp( \"%s\"
);'>%s</a></p></td>" % (command, text) )
+
+ request.write("</table></p>")
+
+class CompositeTab( HTMLBase ):
+
+ def __init__( self, tabs ):
+ HTMLBase.__init__( self )
+ self.tabs = tabs
+
+ def write_BODY( self, request ):
+ for tab in self.tabs:
+ request.write( "<br/>" )
+ tab().write_BODY( request )
+
+ def perform( self, request ):
+ for tab in self.tabs:
+ tab().perform( request )
+
+
+
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/HTMLBase.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/HTMLBase.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,62 @@
+from xen.sv.util import *
+
+class HTMLBase:
+
+ isLeaf = True
+
+ def __init__( self ):
+ pass
+
+ def render_POST( self, request ):
+ self.perform( request )
+ return self.render_GET( request )
+
+ def render_GET( self, request ):
+ self.write_TOP( request )
+ self.write_BODY( request )
+ self.write_BOTTOM( request )
+ return ''
+
+ def write_BODY( self, request ):
+ request.write( "BODY" )
+
+ def write_TOP( self, request ):
+ request.write( '<html><head><title>Xen</title><link rel="stylesheet"
type="text/css" href="inc/style.css" />' )
+ request.write( '<script src="inc/script.js"></script>' )
+ request.write( '</head><body>' )
+ request.write('<form method="post" action="%s">' % request.uri)
+
+ def write_BOTTOM( self, request ):
+ request.write('<input type="hidden" name="op" value="">')
+ request.write('<input type="hidden" name="args" value="">')
+ request.write('</form>')
+ request.write( "</body></html>" )
+
+ def get_op_method(self, op):
+ """Get the method for an operation.
+ For operation 'foo' looks for 'op_foo'.
+
+ op operation name
+ returns method or None
+ """
+ op_method_name = 'op_' + op
+ return getattr(self, op_method_name, None)
+
+ def perform(self, req):
+ """General operation handler for posted operations.
+ For operation 'foo' looks for a method op_foo and calls
+ it with op_foo(req). Replies with code 500 if op_foo
+ is not found.
+
+ The method must return a list when req.use_sxp is true
+ and an HTML string otherwise (or list).
+ Methods may also return a Deferred (for incomplete processing).
+
+ req request
+ """
+ op = req.args.get('op')
+ if not op is None and len(op) == 1:
+ op = op[0]
+ op_method = self.get_op_method(op)
+ if op_method:
+ op_method( req )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Main.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/Main.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,113 @@
+from xen.sv.HTMLBase import HTMLBase
+from xen.sv.DomList import DomList
+from xen.sv.NodeInfo import NodeInfo
+from xen.sv.DomInfo import DomInfo
+from xen.sv.CreateDomain import CreateDomain
+from xen.sv.MigrateDomain import MigrateDomain
+from xen.sv.SaveDomain import SaveDomain
+from xen.sv.RestoreDomain import RestoreDomain
+
+from xen.xend.XendClient import server
+
+from xen.sv.util import getVar
+
+class Main( HTMLBase ):
+
+ isLeaf = True
+
+ def __init__( self, urlWriter = None ):
+ self.modules = { "node": NodeInfo,
+ "list": DomList,
+ "info": DomInfo,
+ "create": CreateDomain,
+ "migrate" : MigrateDomain,
+ "save" : SaveDomain,
+ "restore" : RestoreDomain }
+
+ # ordered list of module menus to display
+ self.module_menus = [ "node", "create", "migrate", "save",
+ "restore", "list" ]
+ HTMLBase.__init__(self)
+
+ def render_POST( self, request ):
+
+ #decide what module post'd the action
+
+ args = getVar( 'args', request )
+
+ mod = getVar( 'mod', request )
+
+ if not mod is None and args is None:
+ module = self.modules[ mod ]
+ #check module exists
+ if module:
+ module( self.mainUrlWriter ).perform( request )
+ else:
+ self.perform( request )
+
+ return self.render_GET( request )
+
+ def mainUrlWriter( self, module ):
+ def fun( f ):
+ return "Main.rpy?mod=%s%s" % ( module, f )
+ return fun
+
+ def write_BODY( self, request ):
+
+ request.write( "\n<table style='border:0px solid black; background:
url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0'
width='780px' height='536px'>\n" )
+ request.write( "<tr>\n" )
+ request.write( " <td width='15px'> </td>" )
+ request.write( " <td width='175px' align='center' valign'center'>" )
+ request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
+ request.write( " <tr><td height='140px' align='center'
valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
+ request.write( " <img src='images/xen.png' width='150' height='75'
border='0'/></a><br/></td></tr>" )
+ request.write( " <tr><td height='60px' align='center'><p
class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom
Wilkie</a> 2004</p></td></tr>")
+ request.write( " <tr><td align='center' valign='top'>" )
+
+ for modName in self.module_menus:
+ self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU(
request )
+
+ request.write( " </td></tr>" )
+ request.write( " </table>" )
+ request.write( " " )
+ request.write( " </td>\n" )
+ request.write( " <td width='15px'> </td>" )
+ request.write( " <td width='558px' align='left' valign='top'>" )
+ request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
+ request.write( " <tr><td height='20px'></td></tr>" )
+ request.write( " <tr><td align='center' valign='top'>" )
+
+ modName = getVar('mod', request)
+
+ if modName is None:
+ request.write( '<p>Please select a module</p>' )
+ else:
+ module = self.modules[ modName ]
+ if module:
+ module( self.mainUrlWriter( modName ) ).write_BODY( request )
+ else:
+ request.write( '<p>Invalid module. Please select another</p>' )
+
+ request.write( " </td></tr>" )
+ request.write( " </table>" )
+ request.write( " </td>\n" )
+ request.write( " <td width='17px'> </td>" )
+ request.write( "</tr>\n" )
+
+ request.write( "</table>\n" )
+
+
+ def op_destroy( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != "0":
+ server.xend_domain_destroy( int( dom ), "halt" )
+
+ def op_pause( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != "0":
+ server.xend_domain_pause( int( dom ) )
+
+ def op_unpause( self, request ):
+ dom = getVar( 'dom', request )
+ if not dom is None and dom != "0":
+ server.xend_domain_unpause( int( dom ) )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/MigrateDomain.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/MigrateDomain.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,74 @@
+from xen.sv.Wizard import *
+from xen.sv.util import *
+from xen.sv.GenTabbed import PreTab
+
+from xen.xm.create import make_config, OptVals
+
+from xen.xend.XendClient import server
+
+class MigrateDomain( Wizard ):
+ def __init__( self, urlWriter ):
+
+ sheets = [ ChooseMigrateDomain,
+ DoMigrate ]
+
+ Wizard.__init__( self, urlWriter, "Migrate Domain", sheets )
+
+
+class ChooseMigrateDomain( Sheet ):
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Configure Migration", 0)
+ domains = []
+ domnames = []
+
+ try:
+ domains = server.xend_domains()
+ domains.sort()
+ except:
+ pass
+
+ for i in domains:
+ if i != 'Domain-0': domnames.append((i,i))
+
+ self.addControl( ListControl('domid',
+ domnames,
+ 'Domain ID:') )
+ self.addControl( TickControl('live',
+ 'True',
+ 'Live migrate:') )
+ self.addControl( InputControl('rate',
+ '0',
+ 'Rate limit:') )
+ self.addControl( InputControl( 'dest', 'myhost.mydomain',
+ 'Name or IP address:',
+ ".*") )
+
+class DoMigrate( Sheet ):
+ def __init__(self, urlWriter ):
+ Sheet.__init__(self, urlWriter, "Migration Done", 1)
+
+ def write_BODY( self, request, err ):
+
+ if not self.passback: self.parseForm( request )
+
+# print string2sxp(self.passback)
+
+ config = ssxp2hash ( string2sxp( self.passback ) )
+
+ try:
+ print config
+ print config['domid'], config['dest']
+ dom_sxp = server.xend_domain_migrate( config['domid'],
+ config['dest'],
+ config.get('live') == 'True',
+ config['rate'] )
+ success = "Your domain was successfully Migrated.\n"
+ except Exception, e:
+ success = "There was an error migrating your domain\n"
+ dom_sxp = str(e)
+
+ pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) )
+ pt.write_BODY( request )
+
+ request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
+ request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/NodeInfo.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/NodeInfo.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,63 @@
+from xen.xend.XendClient import server
+
+from xen.sv.util import *
+from xen.sv.GenTabbed import *
+
+class NodeInfo( GenTabbed ):
+
+ def __init__( self, urlWriter ):
+ GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General',
'Dmesg', ], [ NodeGeneralTab, NodeDmesgTab ] )
+
+ def write_MENU( self, request ):
+ request.write( "<p class='small'><a href='%s'>Node details</a></p>" %
self.urlWriter( '' ) )
+
+class NodeGeneralTab( CompositeTab ):
+ def __init__( self ):
+ CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ] )
+
+class NodeInfoTab( GeneralTab ):
+
+ def __init__( self ):
+
+ nodeInfo = {}
+ try:
+ nodeInfo = sxp2hash( server.xend_node() )
+ except:
+ nodeInfo[ 'system' ] = 'Error getting node info'
+
+ dictTitles = {}
+ dictTitles[ 'System' ] = 'system'
+ dictTitles[ 'Hostname' ] = 'host'
+ dictTitles[ 'Release' ] = 'release'
+ dictTitles[ 'Version' ] ='version'
+ dictTitles[ 'Machine' ] = 'machine'
+ dictTitles[ 'Cores' ] = 'cores'
+ dictTitles[ 'Hyperthreading' ] = ( 'hyperthreads_per_core',
hyperthreadFormatter )
+ dictTitles[ 'CPU Speed' ] = ( 'cpu_mhz', cpuFormatter )
+ dictTitles[ 'Memory' ] = ( 'memory', memoryFormatter )
+ dictTitles[ 'Free Memory' ] = ( 'free_memory', memoryFormatter )
+
+ GeneralTab.__init__( self, dict=nodeInfo, titles=dictTitles )
+
+class NodeDmesgTab( PreTab ):
+
+ def __init__( self ):
+ try:
+ dmesg = server.xend_node_get_dmesg()
+ except:
+ dmesg = "Error getting node information: XenD not running?"
+ PreTab.__init__( self, dmesg )
+
+class NodeActionTab( ActionTab ):
+
+ def __init__( self ):
+ ActionTab.__init__( self, { "shutdown" : "shutdown",
+ "reboot" : "reboot" } )
+
+ def op_shutdown( self, request ):
+ if debug: print ">NodeShutDown"
+ server.xend_node_shutdown()
+
+ def op_reboot( self, request ):
+ if debug: print ">NodeReboot"
+ server.xend_node_reboot()
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/RestoreDomain.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/RestoreDomain.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,46 @@
+from xen.sv.Wizard import *
+from xen.sv.util import *
+from xen.sv.GenTabbed import PreTab
+
+from xen.xm.create import make_config, OptVals
+
+from xen.xend.XendClient import server
+
+class RestoreDomain( Wizard ):
+ def __init__( self, urlWriter ):
+
+ sheets = [ ChooseRestoreDomain,
+ DoRestore ]
+
+ Wizard.__init__( self, urlWriter, "Restore Domain", sheets )
+
+
+class ChooseRestoreDomain( Sheet ):
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Configure Restore", 0)
+
+ self.addControl( InputControl( 'file', '',
+ 'Suspend file name:',
+ ".*") )
+
+class DoRestore( Sheet ):
+ def __init__(self, urlWriter ):
+ Sheet.__init__(self, urlWriter, "Restore Done", 1)
+
+ def write_BODY( self, request, err ):
+
+ if not self.passback: self.parseForm( request )
+ config = ssxp2hash ( string2sxp( self.passback ) )
+
+ try:
+ dom_sxp = server.xend_domain_restore( config['file'] )
+ success = "Your domain was successfully restored.\n"
+ except Exception, e:
+ success = "There was an error restoring your domain\n"
+ dom_sxp = str(e)
+
+ pt = PreTab( success + sxp2prettystring( dom_sxp ) )
+ pt.write_BODY( request )
+
+ request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
+ request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/SaveDomain.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/SaveDomain.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,62 @@
+from xen.sv.Wizard import *
+from xen.sv.util import *
+from xen.sv.GenTabbed import PreTab
+
+from xen.xm.create import make_config, OptVals
+
+from xen.xend.XendClient import server
+
+class SaveDomain( Wizard ):
+ def __init__( self, urlWriter ):
+
+ sheets = [ ChooseSaveDomain,
+ DoSave ]
+
+ Wizard.__init__( self, urlWriter, "Save Domain", sheets )
+
+
+class ChooseSaveDomain( Sheet ):
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Configure Save", 0)
+
+ domains = []
+ domnames = []
+
+ try:
+ domains = server.xend_domains()
+ domains.sort()
+ except:
+ pass
+
+ for i in domains:
+ if i != 'Domain-0': domnames.append((i,i))
+
+ self.addControl( ListControl('domid',
+ domnames,
+ 'Domain ID:') )
+ self.addControl( InputControl( 'file', '',
+ 'Suspend file name:',
+ ".*") )
+
+class DoSave( Sheet ):
+ def __init__(self, urlWriter ):
+ Sheet.__init__(self, urlWriter, "Save Done", 1)
+
+ def write_BODY( self, request, err ):
+
+ if not self.passback: self.parseForm( request )
+ config = ssxp2hash ( string2sxp( self.passback ) )
+
+ try:
+ dom_sxp = server.xend_domain_save( config['domid'],
+ config['file'] )
+ success = "Your domain was successfully saved.\n"
+ except Exception, e:
+ success = "There was an error saving your domain\n"
+ dom_sxp = str(e)
+
+ pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) )
+ pt.write_BODY( request )
+
+ request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
+ request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/TabView.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/TabView.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,26 @@
+from xen.sv.HTMLBase import HTMLBase
+
+class TabView( HTMLBase ):
+
+ # tab - int, id into tabs of selected tab
+ # tabs - list of strings, tab names
+ # urlWriter -
+ def __init__( self, tab, tabs, urlWriter ):
+ HTMLBase.__init__(self)
+ self.tab = tab
+ self.tabs = tabs
+ self.urlWriter = urlWriter
+
+ def write_BODY( self, request ):
+ request.write( "<table style='' border='0' cellspacing='3'
cellpadding='2' align='center'>" )
+ request.write( "<tr height='22'>" )
+
+ for i in range( len( self.tabs ) ):
+ if self.tab == i:
+ backgroundColor = "white"
+ else:
+ backgroundColor = "grey"
+
+ request.write( "<td style='border:1px solid black;
background-color: %s'><p align='center'><a href='%s'>%s</a></p></td>" % (
backgroundColor, self.urlWriter( "&tab=%s" % i ), self.tabs[ i ] ) )
+
+ request.write( "</tr></table>" )
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Wizard.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/Wizard.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,269 @@
+from xen.sv.util import *
+from xen.sv.HTMLBase import HTMLBase
+from xen.xend import sxp
+
+import re
+
+DEBUG = 0
+
+class Wizard( HTMLBase ):
+
+ def __init__( self, urlWriter, title, sheets ):
+ HTMLBase.__init__( self )
+ self.title = title
+ self.sheets = sheets
+ self.urlWriter = urlWriter
+
+ def write_MENU( self, request ):
+ request.write( "<p class='small'><a href='%s'>%s</a></p>" %
(self.urlWriter( '' ), self.title) )
+
+ def write_BODY( self, request ):
+
+ request.write( "<table width='100%' border='0' cellspacing='0'
cellpadding='0'><tr><td>" )
+ request.write( "<p align='center'><u>%s</u></p></td></tr><tr><td>" %
self.title )
+
+ currSheet = getVar( 'sheet', request )
+
+ if not currSheet is None:
+ currSheet = int( currSheet )
+ else:
+ currSheet = 0
+
+ sheet = self.sheets[ currSheet ]( self.urlWriter )
+
+ err = not sheet.validate( request )
+
+ if not err:
+ op = getVar( 'op', request )
+
+ if op == 'next':
+ currSheet += 1
+ elif op == 'prev':
+ currSheet -= 1
+
+ sheet = self.sheets[ currSheet ]( self.urlWriter )
+
+ if getVar( 'visited-sheet%s' % currSheet, request ):
+ sheet.write_BODY( request, err )
+ else:
+ sheet.write_BODY( request, False )
+
+
+ request.write( "</td></tr><tr><td><table width='100%' border='0'
cellspacing='0' cellpadding='0'><tr>" )
+ request.write( "<td width='80%'></td><td width='20%' align='center'><p
align='center'>" )
+ if currSheet > 0:
+ request.write( "<img src='images/previous.png'
onclick='doOp( \"prev\" )' onmouseover='update( \"wizText\", \"Previous\" )'
onmouseout='update( \"wizText\", \" \" )'> " )
+ if currSheet < ( len( self.sheets ) - 2 ):
+ request.write( "<img src='images/next.png' onclick='doOp( \"next\"
)' onmouseover='update( \"wizText\", \"Next\" )' onmouseout='update(
\"wizText\", \" \" )'>" )
+ elif currSheet == ( len( self.sheets ) - 2 ):
+ request.write( "<img src='images/finish.png' onclick='doOp(
\"next\" )' onmouseover='update( \"wizText\", \"Finish\" )' onmouseout='update(
\"wizText\", \" \" )'>" )
+ request.write( "</p><p align='center'><span
id='wizText'></span></p></td></tr></table>" )
+ request.write( "</td></tr></table>" )
+
+ def op_next( self, request ):
+ pass
+
+ def op_prev( self, request ):
+ pass
+
+ def op_finish( self, request ):
+ pass
+
+class Sheet( HTMLBase ):
+
+ def __init__( self, urlWriter, title, location ):
+ HTMLBase.__init__( self )
+ self.urlWriter = urlWriter
+ self.feilds = []
+ self.title = title
+ self.location = location
+ self.passback = None
+
+ def parseForm( self, request ):
+ do_not_parse = [ 'mod', 'op', 'sheet', 'passback' ]
+
+ passed_back = request.args
+
+ temp_passback = passed_back.get( "passback" )
+
+ if temp_passback is not None and len( temp_passback ) > 0:
+ temp_passback = temp_passback[ len( temp_passback )-1 ]
+ else:
+ temp_passback = "( )"
+
+ last_passback = ssxp2hash( string2sxp( temp_passback ) ) #use special
function - will work with no head on sxp
+
+ if DEBUG: print last_passback
+
+ for (key, value) in passed_back.items():
+ if key not in do_not_parse:
+ last_passback[ key ] = value[ len( value ) - 1 ]
+
+ self.passback = sxp2string( hash2sxp( last_passback ) ) #store the sxp
+
+ if DEBUG: print self.passback
+
+ def write_BODY( self, request, err ):
+
+ if not self.passback: self.parseForm( request )
+
+ request.write( "<p>%s</p>" % self.title )
+
+ previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the
hash for quick reference
+
+ request.write( "<table width='100%' cellpadding='0' cellspacing='1'
border='0'>" )
+
+ for (feild, control) in self.feilds:
+ control.write_Control( request, previous_values.get( feild ) )
+ if err and not control.validate( previous_values.get( feild ) ):
+ control.write_Help( request )
+
+ request.write( "</table>" )
+
+ request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
+ request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
+ request.write( "<input type='hidden' name='visited-sheet%s'
value='True'></p>" % self.location )
+
+ def addControl( self, control ):
+ self.feilds.append( [ control.getName(), control ] )
+
+ def validate( self, request ):
+
+ if not self.passback: self.parseForm( request )
+
+ check = True
+
+ previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the
hash for quick reference
+ if DEBUG: print previous_values
+
+ for (feild, control) in self.feilds:
+ if not control.validate( previous_values.get( feild ) ):
+ check = False
+ if DEBUG: print "> %s = %s" % (feild, previous_values.get(
feild ))
+
+ return check
+
+class SheetControl( HTMLBase ):
+
+ def __init__( self, reg_exp = ".*" ):
+ HTMLBase.__init__( self )
+ self.name = ""
+ self.reg_exp = reg_exp
+
+ def write_Control( self, request, persistedValue ):
+ request.write( "<tr colspan='2'><td>%s</td></tr>" % persistedValue )
+
+ def write_Help( self, request ):
+ request.write( "<tr><td align='right' colspan='2'><p
class='small'>Text must match pattern:" )
+ request.write( " %s</p></td></tr>" % self.reg_exp )
+
+ def validate( self, persistedValue ):
+ if persistedValue is None:
+ persistedValue = ""
+
+ return not re.compile( self.reg_exp ).match( persistedValue ) is None
+
+ def getName( self ):
+ return self.name
+
+ def setName( self, name ):
+ self.name = name
+
+class InputControl( SheetControl ):
+
+ def __init__( self, name, defaultValue, humanText, reg_exp = ".*",
help_text = "You must enter the appropriate details in this feild." ):
+ SheetControl.__init__( self, reg_exp )
+ self.setName( name )
+
+ self.defaultValue = defaultValue
+ self.humanText = humanText
+ self.help_text = help_text
+
+ def write_Control( self, request, persistedValue ):
+ if persistedValue is None:
+ persistedValue = self.defaultValue
+
+ request.write( "<tr><td width='50%%'><p>%s</p></td><td
width='50%%'><input size='40'type='text' name='%s' value=\"%s\"></td></tr>" %
(self.humanText, self.getName(), persistedValue) )
+
+ def write_Help( self, request ):
+ request.write( "<tr><td align='right' colspan='2'><p class='small'>" )
+ request.write( " %s</p></td></tr>" % self.help_text )
+
+class TextControl( SheetControl ):
+
+ def __init__( self, text ):
+ SheetControl.__init__( self )
+ self.text = text
+
+ def write_Control( self, request, persistedValue ):
+ request.write( "<tr><td colspan='2'><p>%s</p></td></tr>" % self.text )
+
+class SmallTextControl( SheetControl ):
+
+ def __init__( self, text ):
+ SheetControl.__init__( self )
+ self.text = text
+
+ def write_Control( self, request, persistedValue ):
+ request.write( "<tr><td colspan='2'><p class='small'>%s</p></tr></td>"
% self.text )
+
+class ListControl( SheetControl ):
+
+ def __init__( self, name, options, humanText ):
+ SheetControl.__init__( self )
+ self.setName( name )
+ self.options = options
+ self.humanText = humanText
+
+ def write_Control( self, request, persistedValue ):
+ request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>"
% self.humanText )
+ request.write( "<select name='%s'>" % self.getName() )
+ for (value, text) in self.options:
+ if value == persistedValue:
+ request.write( "<option value='%s' selected>%s\n" % (value,
text) )
+ else:
+ request.write( "<option value='%s'>%s\n" % (value, text) )
+ request.write( "</select></td></tr>" )
+
+ def validate( self, persistedValue ):
+ for (value, text) in self.options:
+ if value == persistedValue:
+ return True
+
+ return False
+
+class FileControl( InputControl ):
+
+ def __init__( self, name, defaultValue, humanText, reg_exp = ".*",
help_text = "You must enter the appropriate details in this feild." ):
+ InputControl.__init__( self, name, defaultValue, humanText )
+
+ def validate( self, persistedValue ):
+ if persistedValue is None: return False
+ try:
+ open( persistedValue )
+ return True
+ except IOError, TypeError:
+ return False
+
+ def write_Help( self, request ):
+ request.write( "<tr><td colspan='2' align='right'><p
class='small'>File does not exist: you must enter a valid, absolute file
path.</p></td></tr>" )
+
+class TickControl( SheetControl ):
+
+ def __init__( self, name, defaultValue, humanText ):
+ SheetControl.__init__( self )
+ self.setName( name )
+ self.defaultValue = defaultValue
+ self.humanText = humanText
+
+ def write_Control( self, request, persistedValue ):
+ request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>"
% self.humanText )
+
+ if persistedValue == 'True':
+ request.write( "<input type='checkbox' name='%s' value='True'
checked>" % self.getName() )
+ else:
+ request.write( "<input type='checkbox' name='%s' value='True'>" %
self.getName() )
+
+ request.write( "</select></td></tr>" )
+
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/__init__.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/__init__.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,1 @@
+
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/params.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/params.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,3 @@
+SV_PORT = 8080
+SV_ROOT = "/var/lib/xen/sv/"
+PID_FILE = "/var/run/xen-sv.pid"
diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/util.py
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/python/xen/sv/util.py Fri Jul 29 20:25:03 2005
@@ -0,0 +1,126 @@
+from xen.xend.XendClient import server
+from xen.xend import sxp
+from xen.xend import PrettyPrint
+
+import types
+
+def getDomInfoHash( domain ):
+ domInfoHash = {}
+ try:
+ domInfoHash = sxp2hash( server.xend_domain( domain ) )
+ domInfoHash['dom'] = domain
+ except:
+ domInfoHash['name'] = "Error getting domain details"
+ return domInfoHash
+
+def sxp2hash( s ):
+ sxphash = {}
+
+ for child in sxp.children( s ):
+ if isinstance( child, types.ListType ) and len( child ) > 1:
+ if isinstance( child[1], types.ListType ) and len( child ) > 1:
+ sxphash[ child[0] ] = sxp2hash( child[1] )
+ else:
+ sxphash[ child[0] ] = child[1]
+
+ return sxphash
+
+def ssxp2hash( s ):
+ sxphash = {}
+
+ for i in s:
+ if isinstance( i, types.ListType ) and len( i ) > 1:
+ sxphash[ i[0] ] = i[1]
+
+ return sxphash
+
+def hash2sxp( h ):
+ hashsxp = []
+
+ for (key, item) in h.items():
+ hashsxp.append( [key, item] )
+
+ return hashsxp
+
+def string2sxp( string ):
+ pin = sxp.Parser()
+ pin.input( string )
+ return pin.get_val()
+
+def sxp2string( sexp ):
+ return sxp.to_string( sexp )
+
+def sxp2prettystring( sxp ):
+ class tmp:
+ def __init__( self ):
+ self.str = ""
+ def write( self, str ):
+ self.str = self.str + str
+ temp = tmp()
+ PrettyPrint.prettyprint( sxp, out=temp )
+ return temp.str
+
+def getVar( var, request, default=None ):
+
+ arg = request.args.get( var )
+
+ if arg is None:
+ return default
+ else:
+ return arg[ len( arg )-1 ]
+
+def bigTimeFormatter( time ):
+ time = float( time )
+ weeks = time // 604800
+ remainder = time % 604800
+ days = remainder // 86400
+
+ remainder = remainder % 86400
+
+ hms = smallTimeFormatter( remainder )
+
+ return "%d weeks, %d days, %s" % ( weeks, days, hms )
+
+def smallTimeFormatter( time ):
+ time = float( time )
+ hours = time // 3600
+ remainder = time % 3600
+ mins = remainder // 60
+ secs = time % 60
+ return "%02d:%02d:%04.1f (hh:mm:ss.s)" % ( hours, mins, secs )
+
+def stateFormatter( state ):
+ states = [ 'Running', 'Blocked', 'Paused', 'Shutdown', 'Crashed' ]
+
+ stateStr = ""
+
+ for i in range( len( state ) ):
+ if state[i] != "-":
+ stateStr += "%s, " % states[ i ]
+
+ return stateStr + " (%s)" % state
+
+def memoryFormatter( mem ):
+ mem = int( mem )
+ if mem >= 1024:
+ mem = float( mem ) / 1024
+ return "%3.2fGb" % mem
+ else:
+ return "%7dMb" % mem
+
+def cpuFormatter( mhz ):
+ mhz = int( mhz )
+ if mhz > 1000:
+ ghz = float( mhz ) / 1000.0
+ return "%4.2fGHz" % ghz
+ else:
+ return "%4dMHz" % mhz
+
+def hyperthreadFormatter( threads ):
+ try:
+ if int( threads ) > 1:
+ return "Yes"
+ else:
+ return "No"
+ except:
+ return "No"
diff -r a4196568095c -r b53a65034532 tools/sv/Makefile
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/sv/Makefile Fri Jul 29 20:25:03 2005
@@ -0,0 +1,2 @@
+
+all:
diff -r a4196568095c -r b53a65034532 tools/sv/images/destroy.png
--- /dev/null Fri Jul 29 18:52:33 2005
+++ b/tools/sv/images/destroy.png Fri Jul 29 20:25:03 2005
@@ -0,0 +1,23 @@
+?PNG
+
+
\ No newline at end of file
+IHDR 6 6 ?EjÝ |