# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 8799d14bef771ae236166e3c5c00a65dd6f2d44c
# Parent 5f1ed597f1072b86d5c59a588c3ac2aefd0b7450
# Parent 9fb0bad776dd3d1c1cd5eec4990a296fbe0e38dc
Merge with -unstable
diff -r 5f1ed597f107 -r 8799d14bef77 .hgignore
--- a/.hgignore Wed Aug 24 02:43:18 2005
+++ b/.hgignore Thu Aug 25 22:53:20 2005
@@ -69,25 +69,25 @@
^tools/blktap/blkdump$
^tools/blktap/blkgnbd$
^tools/blktap/blkimg$
-^tools/blktap/blockstore\.dat$
-^tools/blktap/blockstored$
^tools/blktap/bstest$
-^tools/blktap/parallax$
^tools/blktap/vdi\.dot$
^tools/blktap/vdi\.ps$
-^tools/blktap/vdi_create$
-^tools/blktap/vdi_fill$
-^tools/blktap/vdi_list$
-^tools/blktap/vdi_snap$
-^tools/blktap/vdi_snap_list$
-^tools/blktap/vdi_tree$
-^tools/blktap/vdi_validate$
+^tools/blktap/parallax/vdi_create$
+^tools/blktap/parallax/vdi_fill$
+^tools/blktap/parallax/vdi_list$
+^tools/blktap/parallax/vdi_snap$
+^tools/blktap/parallax/vdi_snap_list$
+^tools/blktap/parallax/vdi_snap_delete$
+^tools/blktap/parallax/vdi_tree$
+^tools/blktap/parallax/vdi_validate$
+^tools/blktap/parallax/parallax$
+^tools/blktap/parallax/blockstored$
^tools/blktap/xen/.*$
^tools/check/\..*$
^tools/cmdline/.*$
^tools/cmdline/xen/.*$
-^tools/consoled/consoled$
-^tools/consoled/xc_console$
+^tools/console/xenconsoled$
+^tools/console/xenconsole$
^tools/debugger/pdb/pdb$
^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
@@ -116,7 +116,6 @@
^tools/ioemu/target-.*/Makefile$
^tools/ioemu/target-.*/config\..*$
^tools/ioemu/target-.*/qemu-dm$
-^tools/ioemu/target-.*/qemu-vgaram-bin$
^tools/libxc/xen/.*$
^tools/misc/cpuperf/cpuperf-perfcntr$
^tools/misc/cpuperf/cpuperf-xen$
@@ -148,6 +147,7 @@
^tools/xcs/xcsdump$
^tools/xcutils/xc_restore$
^tools/xcutils/xc_save$
+^tools/xenstat/xentop/xentop$
^tools/xenstore/testsuite/tmp/.*$
^tools/xenstore/xen$
^tools/xenstore/xenstored$
diff -r 5f1ed597f107 -r 8799d14bef77 Config.mk
--- a/Config.mk Wed Aug 24 02:43:18 2005
+++ b/Config.mk Thu Aug 25 22:53:20 2005
@@ -7,13 +7,14 @@
# Tools to run on system hosting the build
HOSTCC = gcc
-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer
AS = $(CROSS_COMPILE)as
LD = $(CROSS_COMPILE)ld
CC = $(CROSS_COMPILE)gcc
CPP = $(CROSS_COMPILE)gcc -E
AR = $(CROSS_COMPILE)ar
+RANLIB = $(CROSS_COMPILE)ranlib
NM = $(CROSS_COMPILE)nm
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
@@ -35,3 +36,15 @@
# Choose the best mirror to download linux kernel
KERNEL_REPO = http://www.kernel.org
+
+# ACM_USE_SECURITY_POLICY is set to security policy of Xen
+# Supported models are:
+# ACM_NULL_POLICY (ACM will not be built with this policy)
+# ACM_CHINESE_WALL_POLICY
+# ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+# ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
+
+# Optional components
+XENSTAT_XENTOP ?= y
+
diff -r 5f1ed597f107 -r 8799d14bef77 Makefile
--- a/Makefile Wed Aug 24 02:43:18 2005
+++ b/Makefile Thu Aug 25 22:53:20 2005
@@ -101,11 +101,6 @@
for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done
for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done
-install-twisted:
- wget http://www.twistedmatrix.com/products/get-current.epy
- tar -zxf Twisted-*.tar.gz
- cd Twisted-* && python setup.py install
-
install-logging: LOGGING=logging-0.4.9.2
install-logging:
[ -f $(LOGGING).tar.gz ] || wget
http://www.red-dove.com/$(LOGGING).tar.gz
@@ -149,7 +144,6 @@
@echo ' kclean - clean guest kernel build trees'
@echo ''
@echo 'Dependency installation targets:'
- @echo ' install-twisted - install the Twisted Matrix Framework'
@echo ' install-logging - install the Python Logging package'
@echo ' install-iptables - install iptables tools'
@echo ''
@@ -178,6 +172,12 @@
rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
rm -rf $(D)/boot/*xen*
rm -rf $(D)/lib/modules/*xen*
+ rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
+ rm -rf $(D)/usr/bin/xc_shadow
+ rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
+ rm -rf $(D)/usr/share/man/man1/xen*
+ rm -rf $(D)/usr/share/man/man8/xen*
+ rm -rf $(D)/usr/lib/xen
# Legacy targets for compatibility
linux24:
diff -r 5f1ed597f107 -r 8799d14bef77 buildconfigs/Rules.mk
--- a/buildconfigs/Rules.mk Wed Aug 24 02:43:18 2005
+++ b/buildconfigs/Rules.mk Thu Aug 25 22:53:20 2005
@@ -66,6 +66,7 @@
PATCHDIRS := $(wildcard patches/*-*)
+ifneq ($(PATCHDIRS),)
-include $(patsubst %,%/.makedep,$(PATCHDIRS))
$(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep:
@@ -80,6 +81,7 @@
([ -d patches/$* ] && \
for i in patches/$*/*.patch ; do ( cd $(@D) ; patch -p1 <../$$i ||
exit 1 ) ; done) || true
touch $@ # update timestamp to avoid rebuild
+endif
%-build:
$(MAKE) -f buildconfigs/mk.$* build
@@ -115,7 +117,7 @@
ifeq ($(XEN_TARGET_X86_PAE),y)
sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\#
CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) >
$(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE)
else
- @: # do nothing yet
+ grep '^CONFIG_HIGHMEM64G=y' $(CONFIG_FILE) >/dev/null && ( sed -e
's!^CONFIG_HIGHMEM64G=y$$!\# CONFIG_HIGHMEM64G is not set!;s!^\#
CONFIG_HIGHMEM4G is not set$$!CONFIG_HIGHMEM4G=y!' $(CONFIG_FILE) >
$(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE) ) || true
endif
# never delete any intermediate files.
diff -r 5f1ed597f107 -r 8799d14bef77 docs/src/user.tex
--- a/docs/src/user.tex Wed Aug 24 02:43:18 2005
+++ b/docs/src/user.tex Thu Aug 25 22:53:20 2005
@@ -1709,8 +1709,11 @@
For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
9600-baud port, 8 data bits, no parity, 1 stop bit,
I/O port base 0x408, IRQ 5.
- If the I/O base and IRQ are standard (com1:0x3f8,4;
- com2:0x2f8,3) then they need not be specified.
+ If some configuration options are standard (e.g., I/O base and IRQ),
+ then only a prefix of the full configuration string need be
+ specified. If the baud rate is pre-configured (e.g., by the
+ bootloader) then you can specify `auto' in place of a numeric baud
+ rate.
\item [console=$<$specifier list$>$ ]
Specify the destination for Xen console I/O.
@@ -1760,7 +1763,7 @@
physical address in the memory map will be ignored. This parameter
may be specified with a B, K, M or G suffix, representing bytes,
kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is bytes.
+ default unit, if no suffix is specified, is kilobytes.
\item [dom0\_mem=xxx ]
Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/hypervisor.h Thu Aug 25 22:53:20 2005
@@ -80,16 +80,42 @@
static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req,
int count,
- int *success_count)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_mmu_update),
- _a1 (req), _a2 (count), _a3 (success_count) : "memory" );
-
- return ret;
-}
+ int *success_count,
+ domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
+
+ return ret;
+}
+
+
+static __inline__ int HYPERVISOR_mmuext_op(struct mmuext_op *op,
+ int count,
+ int *success_count,
+ domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
+
+ return ret;
+}
+
+
static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int
entries)
{
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/mm.h Thu Aug 25 22:53:20 2005
@@ -43,13 +43,27 @@
#define PADDR_MASK ((1UL << PADDR_BITS)-1)
#define VADDR_MASK ((1UL << VADDR_BITS)-1)
-#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
+#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >>
L1_PAGETABLE_SHIFT)
+
+#endif
+
+
+
+#ifdef __i386__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+
+#define L1_PAGETABLE_ENTRIES 1024
+#define L2_PAGETABLE_ENTRIES 1024
+#endif
/* Given a virtual address, get an entry offset into a page table. */
#define l1_table_offset(_a) \
(((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
#define l2_table_offset(_a) \
(((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#ifdef __x86_64__
#define l3_table_offset(_a) \
(((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
#define l4_table_offset(_a) \
@@ -67,13 +81,16 @@
#define _PAGE_PSE 0x080UL
#define _PAGE_GLOBAL 0x100UL
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY |
_PAGE_USER)
+
+#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT)
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
#define PAGE_MASK (~(PAGE_SIZE-1))
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT)
+#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT)
+#define PFN_PHYS(x) ((x) << L1_PAGETABLE_SHIFT)
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
@@ -83,14 +100,14 @@
#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
static __inline__ unsigned long phys_to_machine(unsigned long phys)
{
- unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
- machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+ unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
+ machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
return machine;
}
static __inline__ unsigned long machine_to_phys(unsigned long machine)
{
- unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
- phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+ unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
+ phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
return phys;
}
@@ -105,7 +122,10 @@
#define __va to_virt
#define __pa to_phys
+#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt)))
+
void init_mm(void);
unsigned long alloc_pages(int order);
+int is_mfn_mapped(unsigned long mfn);
#endif /* _MM_H_ */
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/time.h
--- a/extras/mini-os/include/time.h Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/time.h Thu Aug 25 22:53:20 2005
@@ -28,7 +28,7 @@
* of real time into system time
*/
typedef s64 s_time_t;
-#define NOW() ((s_time_t)get_s_time())
+#define NOW() ((s_time_t)monotonic_clock())
#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL )
#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL )
#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL )
@@ -36,7 +36,8 @@
#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL )
#define Time_Max ((s_time_t) 0x7fffffffffffffffLL)
#define FOREVER Time_Max
-
+#define NSEC_TO_USEC(_nsec) (_nsec / 1000UL)
+#define NSEC_TO_SEC(_nsec) (_nsec / 1000000000ULL)
/* wall clock time */
typedef long time_t;
@@ -44,6 +45,11 @@
struct timeval {
time_t tv_sec; /* seconds */
suseconds_t tv_usec; /* microseconds */
+};
+
+struct timespec {
+ time_t ts_sec;
+ long ts_nsec;
};
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/kernel.c Thu Aug 25 22:53:20 2005
@@ -132,22 +132,8 @@
i = 0;
for ( ; ; )
{
- if(i >= 1000)
- {
- {
- unsigned long saved;
- __asm__ ("movl %%esp, %0"
- :"=r"(saved) /* y is output operand */
- /* x is input operand */);
-// :"a"); /* %eax is clobbered register */
- printk("ESP=0x%lx\n", saved);
- }
-
- printk("1000 bloks\n");
- i=0;
- }
// HYPERVISOR_yield();
- block(1);
+ block(100);
i++;
}
}
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/mm.c Thu Aug 25 22:53:20 2005
@@ -5,9 +5,9 @@
*
* File: mm.c
* Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx)
- * Changes:
+ * Changes: Grzegorz Milos
*
- * Date: Aug 2003
+ * Date: Aug 2003, chages Aug 2005
*
* Environment: Xen Minimal OS
* Description: memory management related functions
@@ -41,86 +41,18 @@
#include <types.h>
#include <lib.h>
+
+#ifdef MM_DEBUG
+#define DEBUG(_f, _a...) \
+ printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
+#else
+#define DEBUG(_f, _a...) ((void)0)
+#endif
+
unsigned long *phys_to_machine_mapping;
extern char *stack;
extern char _text, _etext, _edata, _end;
-static void init_page_allocator(unsigned long min, unsigned long max);
-
-void init_mm(void)
-{
-
- unsigned long start_pfn, max_pfn, max_free_pfn;
-
- unsigned long *pgd = (unsigned long *)start_info.pt_base;
-
- printk("MM: Init\n");
-
- printk(" _text: %p\n", &_text);
- printk(" _etext: %p\n", &_etext);
- printk(" _edata: %p\n", &_edata);
- printk(" stack start: %p\n", &stack);
- printk(" _end: %p\n", &_end);
-
- /* set up minimal memory infos */
- start_pfn = PFN_UP(to_phys(&_end));
- max_pfn = start_info.nr_pages;
-
- printk(" start_pfn: %lx\n", start_pfn);
- printk(" max_pfn: %lx\n", max_pfn);
-
- /*
- * we know where free tables start (start_pfn) and how many we
- * have (max_pfn).
- *
- * Currently the hypervisor stores page tables it providesin the
- * high region of the this memory range.
- *
- * next we work out how far down this goes (max_free_pfn)
- *
- * XXX this assumes the hypervisor provided page tables to be in
- * the upper region of our initial memory. I don't know if this
- * is always true.
- */
-
- max_free_pfn = PFN_DOWN(to_phys(pgd));
-#ifdef __i386__
- {
- unsigned long *pgd = (unsigned long *)start_info.pt_base;
- unsigned long pte;
- int i;
- printk(" pgd(pa(pgd)): %lx(%lx)", (u_long)pgd, to_phys(pgd));
-
- for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ )
- {
- unsigned long pgde = *pgd++;
- if ( !(pgde & 1) ) continue;
- pte = machine_to_phys(pgde & PAGE_MASK);
- printk(" PT(%x): %lx(%lx)", i, (u_long)to_virt(pte), pte);
- if (PFN_DOWN(pte) <= max_free_pfn)
- max_free_pfn = PFN_DOWN(pte);
- }
- }
- max_free_pfn--;
- printk(" max_free_pfn: %lx\n", max_free_pfn);
-
- /*
- * now we can initialise the page allocator
- */
- printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
- (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
- (u_long)to_virt(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn));
- init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn));
-#endif
-
-
- /* Now initialise the physical->machine mapping table. */
-
-
- printk("MM: done\n");
-
-
-}
/*********************
* ALLOCATION BITMAP
@@ -213,6 +145,59 @@
#define round_pgdown(_p) ((_p)&PAGE_MASK)
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+#ifdef MM_DEBUG
+/*
+ * Prints allocation[0/1] for @nr_pages, starting at @start
+ * address (virtual).
+ */
+static void print_allocation(void *start, int nr_pages)
+{
+ unsigned long pfn_start = virt_to_pfn(start);
+ int count;
+ for(count = 0; count < nr_pages; count++)
+ if(allocated_in_map(pfn_start + count)) printk("1");
+ else printk("0");
+
+ printk("\n");
+}
+
+/*
+ * Prints chunks (making them with letters) for @nr_pages starting
+ * at @start (virtual).
+ */
+static void print_chunks(void *start, int nr_pages)
+{
+ char chunks[1001], current='A';
+ int order, count;
+ chunk_head_t *head;
+ unsigned long pfn_start = virt_to_pfn(start);
+
+ memset(chunks, (int)'_', 1000);
+ if(nr_pages > 1000)
+ {
+ DEBUG("Can only pring 1000 pages. Increase buffer size.");
+ }
+
+ for(order=0; order < FREELIST_SIZE; order++)
+ {
+ head = free_head[order];
+ while(!FREELIST_EMPTY(head))
+ {
+ for(count = 0; count < 1<< head->level; count++)
+ {
+ if(count + virt_to_pfn(head) - pfn_start < 1000)
+ chunks[count + virt_to_pfn(head) - pfn_start] = current;
+ }
+ head = head->next;
+ current++;
+ }
+ }
+ chunks[nr_pages] = '\0';
+ printk("%s\n", chunks);
+}
+#endif
+
/*
@@ -328,3 +313,198 @@
return 0;
}
+void free_pages(void *pointer, int order)
+{
+ chunk_head_t *freed_ch, *to_merge_ch;
+ chunk_tail_t *freed_ct;
+ unsigned long mask;
+
+ /* First free the chunk */
+ map_free(virt_to_pfn(pointer), 1 << order);
+
+ /* Create free chunk */
+ freed_ch = (chunk_head_t *)pointer;
+ freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
+
+ /* Now, possibly we can conseal chunks together */
+ while(order < FREELIST_SIZE)
+ {
+ mask = 1 << (order + PAGE_SHIFT);
+ if((unsigned long)freed_ch & mask)
+ {
+ to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
+ if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+ to_merge_ch->level != order)
+ break;
+
+ /* Merge with predecessor */
+ freed_ch = to_merge_ch;
+ }
+ else
+ {
+ to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
+ if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+ to_merge_ch->level != order)
+ break;
+
+ /* Merge with successor */
+ freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
+ }
+
+ /* We are commited to merging, unlink the chunk */
+ *(to_merge_ch->pprev) = to_merge_ch->next;
+ to_merge_ch->next->pprev = to_merge_ch->pprev;
+
+ order++;
+ }
+
+ /* Link the new chunk */
+ freed_ch->level = order;
+ freed_ch->next = free_head[order];
+ freed_ch->pprev = &free_head[order];
+ freed_ct->level = order;
+
+ freed_ch->next->pprev = &freed_ch->next;
+ free_head[order] = freed_ch;
+
+}
+void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
+{
+ unsigned long pfn_to_map, pt_frame;
+ unsigned long mach_ptd, max_mach_ptd;
+ int count;
+ unsigned long mach_pte, virt_pte;
+ unsigned long *ptd = (unsigned long *)start_info.pt_base;
+ mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
+ struct mmuext_op pin_request;
+
+ /* Firstly work out what is the first pfn that is not yet in page tables
+ NB. Assuming that builder fills whole pt_frames (which it does at the
+ moment)
+ */
+ pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
+ DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld",
+ *start_pfn, pfn_to_map, *max_pfn);
+
+ /* Machine address of page table directory */
+ mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
+ mach_ptd += sizeof(void *) *
+ l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
+
+ max_mach_ptd = sizeof(void *) *
+ l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
+
+ /* Check that we are not trying to access Xen region */
+ if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
+ {
+ printk("WARNING: mini-os will not use all the memory supplied\n");
+ max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
+ *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
+ }
+ max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
+ DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd);
+
+ pt_frame = *start_pfn;
+ /* Should not happen - no empty, mapped pages */
+ if(pt_frame >= pfn_to_map)
+ {
+ printk("ERROR: Not even a single empty, mapped page\n");
+ *(int*)0=0;
+ }
+
+ while(mach_ptd < max_mach_ptd)
+ {
+ /* Correct protection needs to be set for the new page table frame */
+ virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
+ mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
+ mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
+ DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
+ "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame),
+ virt_pte, mach_pte);
+
+ /* Update the entry */
+ mmu_updates[0].ptr = mach_pte;
+ mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
+ (L1_PROT & ~_PAGE_RW);
+ if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
+ {
+ printk("PTE for new page table page could not be updated\n");
+ *(int*)0=0;
+ }
+
+ /* Pin the page to provide correct protection */
+ pin_request.cmd = MMUEXT_PIN_L1_TABLE;
+ pin_request.mfn = pfn_to_mfn(pt_frame);
+ if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
+ {
+ printk("ERROR: pinning failed\n");
+ *(int*)0=0;
+ }
+
+ /* Now fill the new page table page with entries.
+ Update the page directory as well. */
+ count = 0;
+ mmu_updates[count].ptr = mach_ptd;
+ mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
+ L2_PROT;
+ count++;
+ mach_ptd += sizeof(void *);
+ mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
+
+ for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
+ {
+ mmu_updates[count].ptr = mach_pte;
+ mmu_updates[count].val =
+ pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+ if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
+ mach_pte += sizeof(void *);
+ }
+ if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
+ {
+ printk("ERROR: mmu_update failed\n");
+ *(int*)0=0;
+ }
+ (*start_pfn)++;
+ }
+
+ *start_pfn = pt_frame;
+}
+
+void init_mm(void)
+{
+
+ unsigned long start_pfn, max_pfn;
+
+ printk("MM: Init\n");
+
+ printk(" _text: %p\n", &_text);
+ printk(" _etext: %p\n", &_etext);
+ printk(" _edata: %p\n", &_edata);
+ printk(" stack start: %p\n", &stack);
+ printk(" _end: %p\n", &_end);
+
+ /* set up minimal memory infos */
+ phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+
+ /* First page follows page table pages and 3 more pages (store page etc) */
+ start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
+ max_pfn = start_info.nr_pages;
+
+ printk(" start_pfn: %lx\n", start_pfn);
+ printk(" max_pfn: %lx\n", max_pfn);
+
+
+ build_pagetable(&start_pfn, &max_pfn);
+
+#ifdef __i386__
+ /*
+ * now we can initialise the page allocator
+ */
+ printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
+ (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
+ (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
+ init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
+#endif
+
+ printk("MM: done\n");
+}
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/time.c
--- a/extras/mini-os/time.c Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/time.c Thu Aug 25 22:53:20 2005
@@ -43,19 +43,20 @@
* Time functions
*************************************************************************/
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-
/* These are peridically updated in shared_info, and then copied here. */
-static u32 shadow_tsc_stamp;
-static s64 shadow_system_time;
-static u32 shadow_time_version;
-static struct timeval shadow_tv;
+struct shadow_time_info {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ u32 tsc_to_usec_mul;
+ int tsc_shift;
+ u32 version;
+};
+static struct timespec shadow_ts;
+static u32 shadow_ts_version;
+
+static struct shadow_time_info shadow;
+
#ifndef rmb
#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
@@ -63,116 +64,150 @@
#define HANDLE_USEC_OVERFLOW(_tv) \
do { \
- while ( (_tv).tv_usec >= 1000000 ) \
+ while ( (_tv)->tv_usec >= 1000000 ) \
{ \
- (_tv).tv_usec -= 1000000; \
- (_tv).tv_sec++; \
+ (_tv)->tv_usec -= 1000000; \
+ (_tv)->tv_sec++; \
} \
} while ( 0 )
+static inline int time_values_up_to_date(void)
+{
+ struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0];
+
+ return (shadow.version == src->version);
+}
+
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
+ if ( shift < 0 )
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "add %4,%%eax ; "
+ "xor %5,%5 ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#else
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#endif
+
+ return product;
+}
+
+
+static unsigned long get_nsec_offset(void)
+{
+ u64 now, delta;
+ rdtscll(now);
+ delta = now - shadow.tsc_timestamp;
+ return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift);
+}
+
+
static void get_time_values_from_xen(void)
{
- do {
- shadow_time_version = HYPERVISOR_shared_info->time_version2;
- rmb();
- shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec;
- shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec;
- shadow_tsc_stamp = (u32)HYPERVISOR_shared_info->tsc_timestamp;
- shadow_system_time = HYPERVISOR_shared_info->system_time;
- rmb();
- }
- while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
-}
-
-
-#define TIME_VALUES_UP_TO_DATE \
- (shadow_time_version == HYPERVISOR_shared_info->time_version2)
-
-static u32 get_time_delta_usecs(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= shadow_tsc_stamp;
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-s64 get_s_time (void)
-{
- u64 u_delta;
- s64 ret;
-
- again:
-
- u_delta = get_time_delta_usecs();
- ret = shadow_system_time + (1000 * u_delta);
-
- if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
- {
- /*
- * We may have blocked for a long time, rendering our calculations
- * invalid (e.g. the time delta may have overflowed). Detect that
- * and recalculate with fresh values.
- */
- get_time_values_from_xen();
- goto again;
- }
-
- return ret;
-}
+ struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0];
+
+ do {
+ shadow.version = src->version;
+ rmb();
+ shadow.tsc_timestamp = src->tsc_timestamp;
+ shadow.system_timestamp = src->system_time;
+ shadow.tsc_to_nsec_mul = src->tsc_to_system_mul;
+ shadow.tsc_shift = src->tsc_shift;
+ rmb();
+ }
+ while ((src->version & 1) | (shadow.version ^ src->version));
+
+ shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000;
+}
+
+
+
+
+/* monotonic_clock(): returns # of nanoseconds passed since time_init()
+ * Note: This function is required to return accurate
+ * time even in the absence of multiple timer ticks.
+ */
+u64 monotonic_clock(void)
+{
+ u64 time;
+ u32 local_time_version;
+
+ do {
+ local_time_version = shadow.version;
+ rmb();
+ time = shadow.system_timestamp + get_nsec_offset();
+ if (!time_values_up_to_date())
+ get_time_values_from_xen();
+ rmb();
+ } while (local_time_version != shadow.version);
+
+ return time;
+}
+
+static void update_wallclock(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ do {
+ shadow_ts_version = s->wc_version;
+ rmb();
+ shadow_ts.ts_sec = s->wc_sec;
+ shadow_ts.ts_nsec = s->wc_nsec;
+ rmb();
+ }
+ while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version));
+}
+
void gettimeofday(struct timeval *tv)
{
- struct timeval _tv;
-
- do {
- get_time_values_from_xen();
- _tv.tv_usec = get_time_delta_usecs();
- _tv.tv_sec = shadow_tv.tv_sec;
- _tv.tv_usec += shadow_tv.tv_usec;
- }
- while ( unlikely(!TIME_VALUES_UP_TO_DATE) );
-
- HANDLE_USEC_OVERFLOW(_tv);
- *tv = _tv;
-}
+ u64 nsec = monotonic_clock();
+ nsec += shadow_ts.ts_nsec;
+
+
+ tv->tv_sec = shadow_ts.ts_sec;
+ tv->tv_sec += NSEC_TO_SEC(nsec);
+ tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL);
+}
+
static void print_current_time(void)
{
- struct timeval tv;
-
- get_time_values_from_xen();
+ struct timeval tv;
gettimeofday(&tv);
printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec);
}
+
void block(u32 millisecs)
{
struct timeval tv;
gettimeofday(&tv);
- //printk("tv.tv_sec=%ld, tv.tv_usec=%ld, shadow_system_time=%lld\n",
tv.tv_sec, tv.tv_usec, shadow_system_time );
- HYPERVISOR_set_timer_op(get_s_time() + 1000000LL * (s64) millisecs);
+ HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs);
HYPERVISOR_block();
}
@@ -185,7 +220,7 @@
static int i;
get_time_values_from_xen();
-
+ update_wallclock();
i++;
if (i >= 1000) {
print_current_time();
@@ -197,24 +232,5 @@
void init_time(void)
{
- u64 __cpu_khz;
- unsigned long cpu_khz;
-
- __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
-
- cpu_khz = (u32) (__cpu_khz/1000);
-
- printk("Xen reported: %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
- {
- unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
- :"r" (cpu_khz),
- "0" (eax), "1" (edx));
- }
-
bind_virq(VIRQ_TIMER, &timer_handler);
}
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/arch/xen/Makefile
--- a/linux-2.4-xen-sparse/arch/xen/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/arch/xen/Makefile Thu Aug 25 22:53:20 2005
@@ -61,7 +61,6 @@
SUBDIRS += arch/xen/drivers/evtchn
SUBDIRS += arch/xen/drivers/blkif
SUBDIRS += arch/xen/drivers/netif
-#SUBDIRS += arch/xen/drivers/usbif
SUBDIRS += arch/xen/drivers/balloon
ifdef CONFIG_XEN_PRIVILEGED_GUEST
SUBDIRS += arch/xen/drivers/dom0
@@ -72,7 +71,6 @@
CORE_FILES += arch/xen/drivers/console/drv.o
DRIVERS += arch/xen/drivers/blkif/drv.o
DRIVERS += arch/xen/drivers/netif/drv.o
-DRIVERS += arch/xen/drivers/usbif/drv.o
ifdef CONFIG_XEN_PRIVILEGED_GUEST
CORE_FILES += arch/xen/drivers/dom0/drv.o
endif
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/arch/xen/config.in
--- a/linux-2.4-xen-sparse/arch/xen/config.in Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/arch/xen/config.in Thu Aug 25 22:53:20 2005
@@ -16,14 +16,10 @@
comment 'Xen'
bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
- bool 'USB-device backend driver' CONFIG_XEN_USB_BACKEND
-fi
bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
-bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
endmenu
# The IBM S/390 patch needs this.
define_bool CONFIG_NO_IDLE_HZ y
@@ -267,7 +263,7 @@
source drivers/char/Config.in
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ];
then
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
source drivers/media/Config.in
fi
@@ -302,14 +298,8 @@
endmenu
fi
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ];
then
- if [ "$CONFIG_XEN_USB_FRONTEND" = "y" -o "$CONFIG_XEN_USB_BACKEND" = "y" ];
then
- define_bool CONFIG_USB y
- fi
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
source drivers/usb/Config.in
-fi
-
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
source net/bluetooth/Config.in
fi
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/mkbuildtree
--- a/linux-2.4-xen-sparse/mkbuildtree Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/mkbuildtree Thu Aug 25 22:53:20 2005
@@ -103,9 +103,8 @@
rm -f mkbuildtree
set ${RS}/../linux-2.6-xen-sparse
-[ "$1" == "${RS}/../linux-2.6-xen-parse" ] && { echo "no Linux 2.6 sparse tree
at ${RS}/../linux-2.6-xen-sparse"; exit 1; }
+[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse
tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; }
LINUX_26="$1"
-
# Create links to the shared definitions of the Xen interfaces.
rm -rf ${AD}/include/asm-xen/xen-public
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Thu Aug 25 22:53:20 2005
@@ -61,15 +61,6 @@
with the blktap. This option will be removed as the block drivers
are
modified to use grant tables.
-config XEN_BLKDEV_GRANT
- bool "Grant table substrate for block drivers"
- depends on !XEN_BLKDEV_TAP_BE
- default y
- help
- This introduces the use of grant tables as a data exhange mechanism
- between the frontend and backend block drivers. This currently
- conflicts with the block tap.
-
config XEN_NETDEV_BACKEND
bool "Network-device backend driver"
depends on XEN_PHYSDEV_ACCESS
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xen0
-# Mon Jul 25 09:48:34 2005
+# Wed Aug 3 09:54:56 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -14,12 +14,11 @@
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
# CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
-#CONFIG_XEN_NETDEV_GRANT_TX=y
-#CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -93,11 +92,11 @@
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
-CONFIG_MPENTIUM4=y
+# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
@@ -112,15 +111,15 @@
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
# CONFIG_HPET_TIMER is not set
# CONFIG_HPET_EMULATE_RTC is not set
@@ -130,6 +129,7 @@
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_X86_CPUID=y
+CONFIG_SWIOTLB=y
#
# Firmware Drivers
@@ -540,7 +540,7 @@
# CONFIG_IP_NF_MATCH_STATE is not set
# CONFIG_IP_NF_MATCH_CONNTRACK is not set
# CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
# CONFIG_IP_NF_MATCH_REALM is not set
# CONFIG_IP_NF_MATCH_SCTP is not set
@@ -688,7 +688,7 @@
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
+CONFIG_SK98LIN=y
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
# CONFIG_BNX2 is not set
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-xen0
-# Wed Jun 29 10:01:20 2005
+# Linux kernel version: 2.6.12.4-xen0
+# Mon Aug 15 18:57:19 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
# CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -50,6 +51,7 @@
# CONFIG_IKCONFIG is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_PRINTK=y
CONFIG_BUG=y
@@ -116,9 +118,11 @@
CONFIG_GENERIC_CPU=y
CONFIG_X86_L1_CACHE_BYTES=128
# CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
# CONFIG_X86_MSR is not set
# CONFIG_GART_IOMMU is not set
CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
# CONFIG_X86_MCE is not set
#
@@ -160,6 +164,7 @@
CONFIG_STANDALONE=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
#
# Memory Technology Devices (MTD)
@@ -369,7 +374,23 @@
#
# Multi-device support (RAID and LVM)
#
-# CONFIG_MD is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID10 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+# CONFIG_DM_ZERO is not set
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_EMC=y
#
# Fusion MPT device support
@@ -458,7 +479,7 @@
# CONFIG_IP_NF_MATCH_STATE is not set
# CONFIG_IP_NF_MATCH_CONNTRACK is not set
# CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
# CONFIG_IP_NF_MATCH_REALM is not set
# CONFIG_IP_NF_MATCH_SCTP is not set
@@ -589,7 +610,7 @@
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
+CONFIG_SK98LIN=y
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
# CONFIG_BNX2 is not set
@@ -786,7 +807,107 @@
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed;
see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+
+#
+# USB ATM/DSL drivers
+#
#
# USB Gadget Support
@@ -801,7 +922,12 @@
#
# InfiniBand support
#
-# CONFIG_INFINIBAND is not set
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_MTHCA=y
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
+CONFIG_INFINIBAND_IPOIB=y
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
#
# Power management options
@@ -1036,7 +1162,22 @@
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
+# CONFIG_CHECKING is not set
+# CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xenU
-# Mon Jul 25 10:06:06 2005
+# Wed Aug 3 09:57:44 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -12,11 +12,10 @@
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
-#CONFIG_XEN_NETDEV_GRANT_TX=y
-#CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -90,11 +89,11 @@
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
-CONFIG_MPENTIUM4=y
+# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
@@ -109,15 +108,15 @@
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
# CONFIG_HPET_TIMER is not set
# CONFIG_HPET_EMULATE_RTC is not set
@@ -415,7 +414,7 @@
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
+CONFIG_CRAMFS=y
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xenU
-# Thu Jul 7 11:43:14 2005
+# Thu Aug 18 11:15:14 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -12,9 +12,10 @@
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -28,7 +29,7 @@
#
CONFIG_EXPERIMENTAL=y
CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
@@ -46,8 +47,10 @@
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
@@ -72,6 +75,7 @@
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
@@ -84,12 +88,15 @@
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
# CONFIG_MICROCODE is not set
CONFIG_X86_CPUID=y
# CONFIG_NUMA is not set
# CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
# CONFIG_X86_LOCAL_APIC is not set
# CONFIG_X86_IO_APIC is not set
# CONFIG_PCI is not set
@@ -112,7 +119,11 @@
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_L1_CACHE_BYTES=128
# CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
# CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
CONFIG_DUMMY_IOMMU=y
# CONFIG_X86_MCE is not set
@@ -155,6 +166,7 @@
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
#
# Block devices
@@ -257,7 +269,10 @@
CONFIG_IP_ROUTE_MULTIPATH=y
# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
CONFIG_IP_ROUTE_VERBOSE=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_BROADCAST=y
@@ -557,7 +572,6 @@
#
# Old SIR device drivers
#
-# CONFIG_IRPORT_SIR is not set
#
# Old Serial dongle support
@@ -660,14 +674,14 @@
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
CONFIG_JBD=m
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
-CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_FS=y
# CONFIG_REISERFS_CHECK is not set
CONFIG_REISERFS_PROC_INFO=y
CONFIG_REISERFS_FS_XATTR=y
@@ -746,7 +760,7 @@
# CONFIG_BEFS_DEBUG is not set
CONFIG_BFS_FS=m
CONFIG_EFS_FS=m
-CONFIG_CRAMFS=m
+CONFIG_CRAMFS=y
CONFIG_VXFS_FS=m
# CONFIG_HPFS_FS is not set
CONFIG_QNX4FS_FS=m
@@ -859,17 +873,7 @@
# Security options
#
# CONFIG_KEYS is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_CAPABILITIES=y
-# CONFIG_SECURITY_SECLVL is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+# CONFIG_SECURITY is not set
#
# Cryptographic options
@@ -917,5 +921,19 @@
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.12-xen
-# Thu Jul 14 21:55:53 2005
+# Wed Aug 3 10:04:25 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
# CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -135,6 +136,7 @@
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=m
CONFIG_X86_CPUID=m
+CONFIG_SWIOTLB=y
#
# Firmware Drivers
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Thu Aug
25 22:53:20 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-xen
-# Fri Jul 15 00:34:21 2005
+# Linux kernel version: 2.6.12.4-xen
+# Mon Aug 15 19:54:11 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
# CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_SHADOW_MODE is not set
@@ -33,6 +34,7 @@
# CONFIG_CLEAN_COMPILE is not set
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
@@ -48,10 +50,11 @@
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_BASE_FULL=y
@@ -73,8 +76,9 @@
# CONFIG_MODULE_FORCE_UNLOAD is not set
CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
@@ -87,12 +91,15 @@
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
CONFIG_MICROCODE=y
# CONFIG_X86_CPUID is not set
# CONFIG_NUMA is not set
# CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_PCI=y
@@ -108,7 +115,7 @@
#
CONFIG_X86_64=y
CONFIG_64BIT=y
-# CONFIG_EARLY_PRINTK is not set
+CONFIG_EARLY_PRINTK=y
#
# Processor type and features
@@ -117,9 +124,14 @@
CONFIG_GENERIC_CPU=y
CONFIG_X86_L1_CACHE_BYTES=128
# CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
# CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
# CONFIG_GART_IOMMU is not set
CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
# CONFIG_X86_MCE is not set
#
@@ -149,7 +161,7 @@
# Executable file formats
#
CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
+CONFIG_BINFMT_MISC=y
#
# Device Drivers
@@ -160,7 +172,7 @@
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
+CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
#
@@ -174,7 +186,7 @@
CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# CONFIG_MTD_CMDLINE_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
#
# User Modules And Translation Layers
@@ -206,24 +218,20 @@
# CONFIG_MTD_CFI_I8 is not set
CONFIG_MTD_CFI_INTELEXT=m
CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_AMDSTD_RETRY=0
+CONFIG_MTD_CFI_AMDSTD_RETRY=3
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_CFI_UTIL=m
CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
CONFIG_MTD_ABSENT=m
# CONFIG_MTD_OBSOLETE_CHIPS is not set
-# CONFIG_MTD_XIP is not set
#
# Mapping drivers for chip access
#
CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x8000000
-CONFIG_MTD_PHYSMAP_LEN=0x4000000
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
-CONFIG_MTD_PNC2000=m
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PNC2000 is not set
CONFIG_MTD_SC520CDP=m
CONFIG_MTD_NETSC520=m
CONFIG_MTD_TS5500=m
@@ -231,10 +239,9 @@
CONFIG_MTD_ELAN_104NC=m
# CONFIG_MTD_AMD76XROM is not set
# CONFIG_MTD_ICHXROM is not set
-# CONFIG_MTD_SCB2_FLASH is not set
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_DILNETPC=m
-CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_SCB2_FLASH=m
+# CONFIG_MTD_NETtel is not set
+# CONFIG_MTD_DILNETPC is not set
# CONFIG_MTD_L440GX is not set
CONFIG_MTD_PCI=m
@@ -244,19 +251,19 @@
CONFIG_MTD_PMC551=m
# CONFIG_MTD_PMC551_BUGFIX is not set
# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
CONFIG_MTD_MTDRAM=m
CONFIG_MTDRAM_TOTAL_SIZE=4096
CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLKMTD=m
-# CONFIG_MTD_BLOCK2MTD is not set
+# CONFIG_MTD_BLKMTD is not set
+CONFIG_MTD_BLOCK2MTD=m
#
# Disk-On-Chip Device Drivers
#
CONFIG_MTD_DOC2000=m
-CONFIG_MTD_DOC2001=m
+# CONFIG_MTD_DOC2001 is not set
CONFIG_MTD_DOC2001PLUS=m
CONFIG_MTD_DOCPROBE=m
CONFIG_MTD_DOCECC=m
@@ -269,10 +276,7 @@
CONFIG_MTD_NAND=m
# CONFIG_MTD_NAND_VERIFY_WRITE is not set
CONFIG_MTD_NAND_IDS=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
# CONFIG_MTD_NAND_NANDSIM is not set
#
@@ -280,8 +284,7 @@
#
CONFIG_PARPORT=m
CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_FIFO is not set
# CONFIG_PARPORT_PC_SUPERIO is not set
CONFIG_PARPORT_NOT_PC=y
# CONFIG_PARPORT_GSC is not set
@@ -290,13 +293,7 @@
#
# Plug and Play support
#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
+# CONFIG_PNP is not set
#
# Block devices
@@ -324,7 +321,7 @@
CONFIG_PARIDE_FIT2=m
CONFIG_PARIDE_FIT3=m
CONFIG_PARIDE_EPAT=m
-# CONFIG_PARIDE_EPATC8 is not set
+CONFIG_PARIDE_EPATC8=y
CONFIG_PARIDE_EPIA=m
CONFIG_PARIDE_FRIQ=m
CONFIG_PARIDE_FRPW=m
@@ -345,7 +342,7 @@
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_LBD=y
@@ -360,70 +357,69 @@
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
-# CONFIG_ATA_OVER_ETH is not set
+CONFIG_ATA_OVER_ETH=m
#
# ATA/ATAPI/MFM/RLL support
#
-CONFIG_IDE=m
-CONFIG_BLK_DEV_IDE=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
#
# Please see Documentation/ide.txt for help/info on IDE drives
#
# CONFIG_BLK_DEV_IDE_SATA is not set
# CONFIG_BLK_DEV_HD_IDE is not set
-CONFIG_BLK_DEV_IDEDISK=m
-# CONFIG_IDEDISK_MULTI_MODE is not set
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_IDETAPE=m
-CONFIG_BLK_DEV_IDEFLOPPY=m
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=y
CONFIG_BLK_DEV_IDESCSI=m
# CONFIG_IDE_TASK_IOCTL is not set
#
# IDE chipset support/bugfixes
#
-CONFIG_IDE_GENERIC=m
+CONFIG_IDE_GENERIC=y
CONFIG_BLK_DEV_CMD640=y
-# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
CONFIG_BLK_DEV_IDEPCI=y
CONFIG_IDEPCI_SHARE_IRQ=y
# CONFIG_BLK_DEV_OFFBOARD is not set
-CONFIG_BLK_DEV_GENERIC=m
-CONFIG_BLK_DEV_OPTI621=m
-CONFIG_BLK_DEV_RZ1000=m
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
-CONFIG_BLK_DEV_AEC62XX=m
-CONFIG_BLK_DEV_ALI15X3=m
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
# CONFIG_WDC_ALI15X3 is not set
-CONFIG_BLK_DEV_AMD74XX=m
-CONFIG_BLK_DEV_ATIIXP=m
-CONFIG_BLK_DEV_CMD64X=m
-CONFIG_BLK_DEV_TRIFLEX=m
-CONFIG_BLK_DEV_CY82C693=m
-CONFIG_BLK_DEV_CS5520=m
-CONFIG_BLK_DEV_CS5530=m
-CONFIG_BLK_DEV_HPT34X=m
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
# CONFIG_HPT34X_AUTODMA is not set
-CONFIG_BLK_DEV_HPT366=m
-CONFIG_BLK_DEV_SC1200=m
-CONFIG_BLK_DEV_PIIX=m
-CONFIG_BLK_DEV_NS87415=m
-CONFIG_BLK_DEV_PDC202XX_OLD=m
-CONFIG_PDC202XX_BURST=y
-CONFIG_BLK_DEV_PDC202XX_NEW=m
+CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
CONFIG_PDC202XX_FORCE=y
-CONFIG_BLK_DEV_SVWKS=m
-CONFIG_BLK_DEV_SIIMAGE=m
-CONFIG_BLK_DEV_SIS5513=m
-CONFIG_BLK_DEV_SLC90E66=m
-CONFIG_BLK_DEV_TRM290=m
-CONFIG_BLK_DEV_VIA82CXXX=m
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
@@ -433,17 +429,17 @@
#
# SCSI device support
#
-CONFIG_SCSI=m
+CONFIG_SCSI=y
CONFIG_SCSI_PROC_FS=y
#
# SCSI support type (disk, tape, CD-ROM)
#
-CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=m
#
@@ -458,7 +454,7 @@
#
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=m
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=m
#
# SCSI low-level drivers
@@ -468,29 +464,30 @@
CONFIG_SCSI_ACARD=m
CONFIG_SCSI_AACRAID=m
CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=8
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
CONFIG_SCSI_AIC7XXX_OLD=m
CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_CMDS_PER_DEVICE=4
CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_ENABLE_RD_STRM=y
-CONFIG_AIC79XX_DEBUG_ENABLE=y
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_ADVANSYS=m
-# CONFIG_MEGARAID_NEWGEN is not set
-CONFIG_MEGARAID_LEGACY=m
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_ADVANSYS is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
CONFIG_SCSI_SATA=y
CONFIG_SCSI_SATA_AHCI=m
CONFIG_SCSI_SATA_SVW=m
-CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_ATA_PIIX=y
CONFIG_SCSI_SATA_NV=m
CONFIG_SCSI_SATA_PROMISE=m
-# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_QSTOR=m
CONFIG_SCSI_SATA_SX4=m
CONFIG_SCSI_SATA_SIL=m
CONFIG_SCSI_SATA_SIS=m
@@ -500,17 +497,14 @@
CONFIG_SCSI_BUSLOGIC=m
# CONFIG_SCSI_OMIT_FLASHPOINT is not set
# CONFIG_SCSI_CPQFCTS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_EATA=m
-CONFIG_SCSI_EATA_TAGGED_QUEUE=y
-CONFIG_SCSI_EATA_LINKED_COMMANDS=y
-CONFIG_SCSI_EATA_MAX_TAGS=16
-CONFIG_SCSI_EATA_PIO=m
-CONFIG_SCSI_FUTURE_DOMAIN=m
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
CONFIG_SCSI_GDTH=m
CONFIG_SCSI_IPS=m
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_INIA100=m
CONFIG_SCSI_PPA=m
CONFIG_SCSI_IMM=m
# CONFIG_SCSI_IZIP_EPP16 is not set
@@ -520,32 +514,29 @@
CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-CONFIG_SCSI_IPR=m
-# CONFIG_SCSI_IPR_TRACE is not set
-# CONFIG_SCSI_IPR_DUMP is not set
+# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_PCI2000 is not set
# CONFIG_SCSI_PCI2220I is not set
-CONFIG_SCSI_QLOGIC_ISP=m
-CONFIG_SCSI_QLOGIC_FC=m
-CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
CONFIG_SCSI_QLOGIC_1280=m
CONFIG_SCSI_QLOGIC_1280_1040=y
-CONFIG_SCSI_QLA2XXX=m
-# CONFIG_SCSI_QLA21XX is not set
-# CONFIG_SCSI_QLA22XX is not set
-# CONFIG_SCSI_QLA2300 is not set
-# CONFIG_SCSI_QLA2322 is not set
-# CONFIG_SCSI_QLA6312 is not set
+CONFIG_SCSI_QLA2XXX=y
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
CONFIG_SCSI_LPFC=m
CONFIG_SCSI_DC395x=m
CONFIG_SCSI_DC390T=m
-CONFIG_SCSI_DEBUG=m
+# CONFIG_SCSI_DEBUG is not set
#
# Multi-device support (RAID and LVM)
#
CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
+CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m
@@ -579,7 +570,7 @@
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_OUI_DB=y
CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
@@ -618,9 +609,9 @@
#
# Networking options
#
-CONFIG_PACKET=m
+CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
-CONFIG_UNIX=m
+CONFIG_UNIX=y
CONFIG_NET_KEY=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
@@ -784,11 +775,6 @@
CONFIG_IP6_NF_RAW=m
#
-# DECnet: Netfilter Configuration
-#
-CONFIG_DECNET_NF_GRABULATOR=m
-
-#
# Bridge: Netfilter Configuration
#
CONFIG_BRIDGE_NF_EBTABLES=m
@@ -810,9 +796,9 @@
CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
-# CONFIG_BRIDGE_EBT_ULOG is not set
+CONFIG_BRIDGE_EBT_ULOG=m
CONFIG_XFRM=y
-CONFIG_XFRM_USER=m
+CONFIG_XFRM_USER=y
#
# SCTP Configuration (EXPERIMENTAL)
@@ -823,19 +809,18 @@
# CONFIG_SCTP_HMAC_NONE is not set
# CONFIG_SCTP_HMAC_SHA1 is not set
CONFIG_SCTP_HMAC_MD5=y
-CONFIG_ATM=y
-CONFIG_ATM_CLIP=y
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
# CONFIG_ATM_CLIP_NO_ICMP is not set
CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
+# CONFIG_ATM_MPOA is not set
CONFIG_ATM_BR2684=m
# CONFIG_ATM_BR2684_IPFILTER is not set
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-# CONFIG_DECNET_ROUTER is not set
+# CONFIG_DECNET is not set
CONFIG_LLC=y
-CONFIG_LLC2=m
+# CONFIG_LLC2 is not set
CONFIG_IPX=m
# CONFIG_IPX_INTERN is not set
CONFIG_ATALK=m
@@ -843,12 +828,10 @@
CONFIG_IPDDP=m
CONFIG_IPDDP_ENCAP=y
CONFIG_IPDDP_DECAP=y
-CONFIG_X25=m
-CONFIG_LAPB=m
-# CONFIG_NET_DIVERT is not set
-CONFIG_ECONET=m
-CONFIG_ECONET_AUNUDP=y
-CONFIG_ECONET_NATIVE=y
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
CONFIG_WAN_ROUTER=m
#
@@ -880,9 +863,9 @@
CONFIG_NET_CLS_ROUTE=y
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
-# CONFIG_CLS_U32_PERF is not set
-# CONFIG_NET_CLS_IND is not set
-# CONFIG_CLS_U32_MARK is not set
+CONFIG_CLS_U32_PERF=y
+CONFIG_NET_CLS_IND=y
+CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_EMATCH=y
@@ -897,31 +880,12 @@
#
# Network testing
#
-CONFIG_NET_PKTGEN=m
+# CONFIG_NET_PKTGEN is not set
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NETPOLL_TRAP=y
CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-# CONFIG_AX25_DAMA_SLAVE is not set
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
+# CONFIG_HAMRADIO is not set
CONFIG_IRDA=m
#
@@ -937,7 +901,7 @@
#
CONFIG_IRDA_CACHE_LAST_LSAP=y
CONFIG_IRDA_FAST_RR=y
-CONFIG_IRDA_DEBUG=y
+# CONFIG_IRDA_DEBUG is not set
#
# Infrared-port device drivers
@@ -1002,9 +966,9 @@
CONFIG_BT_HCIUART=m
CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-# CONFIG_BT_HCIUART_BCSP_TXCRC is not set
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
CONFIG_BT_HCIBCM203X=m
-# CONFIG_BT_HCIBPA10X is not set
+CONFIG_BT_HCIBPA10X=m
CONFIG_BT_HCIBFUSB=m
CONFIG_BT_HCIVHCI=m
CONFIG_NETDEVICES=y
@@ -1012,21 +976,11 @@
CONFIG_BONDING=m
CONFIG_EQUALIZER=m
CONFIG_TUN=m
-CONFIG_NET_SB1000=m
#
# ARCnet devices
#
-CONFIG_ARCNET=m
-CONFIG_ARCNET_1201=m
-CONFIG_ARCNET_1051=m
-CONFIG_ARCNET_RAW=m
-# CONFIG_ARCNET_CAP is not set
-CONFIG_ARCNET_COM90xx=m
-CONFIG_ARCNET_COM90xxIO=m
-CONFIG_ARCNET_RIM_I=m
-CONFIG_ARCNET_COM20020=m
-CONFIG_ARCNET_COM20020_PCI=m
+# CONFIG_ARCNET is not set
#
# Ethernet (10 or 100Mbit)
@@ -1046,21 +1000,21 @@
CONFIG_DE2104X=m
CONFIG_TULIP=m
# CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_MMIO=y
# CONFIG_TULIP_NAPI is not set
CONFIG_DE4X5=m
CONFIG_WINBOND_840=m
CONFIG_DM9102=m
-CONFIG_HP100=m
+# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
CONFIG_PCNET32=m
CONFIG_AMD8111_ETH=m
-# CONFIG_AMD8111E_NAPI is not set
+CONFIG_AMD8111E_NAPI=y
CONFIG_ADAPTEC_STARFIRE=m
-# CONFIG_ADAPTEC_STARFIRE_NAPI is not set
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
CONFIG_B44=m
CONFIG_FORCEDETH=m
-# CONFIG_DGRS is not set
+CONFIG_DGRS=m
CONFIG_EEPRO100=m
CONFIG_E100=m
CONFIG_FEALNX=m
@@ -1069,7 +1023,7 @@
CONFIG_8139CP=m
CONFIG_8139TOO=m
CONFIG_8139TOO_PIO=y
-CONFIG_8139TOO_TUNE_TWISTER=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
CONFIG_8139TOO_8129=y
# CONFIG_8139_OLD_RX_RESET is not set
CONFIG_SIS900=m
@@ -1077,21 +1031,22 @@
CONFIG_SUNDANCE=m
# CONFIG_SUNDANCE_MMIO is not set
CONFIG_VIA_RHINE=m
-# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_VIA_RHINE_MMIO=y
#
# Ethernet (1000 Mbit)
#
-# CONFIG_ACENIC is not set
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
CONFIG_DL2K=m
CONFIG_E1000=m
-# CONFIG_E1000_NAPI is not set
+CONFIG_E1000_NAPI=y
CONFIG_NS83820=m
CONFIG_HAMACHI=m
CONFIG_YELLOWFIN=m
CONFIG_R8169=m
-# CONFIG_R8169_NAPI is not set
-# CONFIG_R8169_VLAN is not set
+CONFIG_R8169_NAPI=y
+CONFIG_R8169_VLAN=y
CONFIG_SK98LIN=m
CONFIG_VIA_VELOCITY=m
CONFIG_TIGON3=m
@@ -1101,9 +1056,9 @@
# Ethernet (10000 Mbit)
#
CONFIG_IXGB=m
-# CONFIG_IXGB_NAPI is not set
+CONFIG_IXGB_NAPI=y
CONFIG_S2IO=m
-# CONFIG_S2IO_NAPI is not set
+CONFIG_S2IO_NAPI=y
# CONFIG_2BUFF_MODE is not set
#
@@ -1124,7 +1079,7 @@
#
# Obsolete Wireless cards support (pre-802.11)
#
-CONFIG_STRIP=m
+# CONFIG_STRIP is not set
#
# Wireless 802.11b ISA/PCI cards support
@@ -1145,35 +1100,7 @@
#
# Wan interfaces
#
-CONFIG_WAN=y
-CONFIG_DSCC4=m
-CONFIG_DSCC4_PCISYNC=y
-CONFIG_DSCC4_PCI_RST=y
-CONFIG_LANMEDIA=m
-CONFIG_SYNCLINK_SYNCPPP=m
-CONFIG_HDLC=m
-CONFIG_HDLC_RAW=y
-CONFIG_HDLC_RAW_ETH=y
-CONFIG_HDLC_CISCO=y
-CONFIG_HDLC_FR=y
-CONFIG_HDLC_PPP=y
-CONFIG_HDLC_X25=y
-CONFIG_PCI200SYN=m
-CONFIG_WANXL=m
-CONFIG_PC300=m
-CONFIG_PC300_MLPPP=y
-CONFIG_FARSYNC=m
-CONFIG_DLCI=m
-CONFIG_DLCI_COUNT=24
-CONFIG_DLCI_MAX=8
-CONFIG_WAN_ROUTER_DRIVERS=y
-# CONFIG_VENDOR_SANGOMA is not set
-CONFIG_CYCLADES_SYNC=m
-CONFIG_CYCLOMX_X25=y
-CONFIG_LAPBETHER=m
-CONFIG_X25_ASY=m
-CONFIG_SBNI=m
-# CONFIG_SBNI_MULTILINE is not set
+# CONFIG_WAN is not set
#
# ATM drivers
@@ -1184,8 +1111,7 @@
# CONFIG_ATM_ENI_DEBUG is not set
# CONFIG_ATM_ENI_TUNE_BURST is not set
CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
+# CONFIG_ATM_ZATM is not set
CONFIG_ATM_IDT77252=m
# CONFIG_ATM_IDT77252_DEBUG is not set
# CONFIG_ATM_IDT77252_RCV_ALL is not set
@@ -1195,20 +1121,13 @@
CONFIG_ATM_HORIZON=m
# CONFIG_ATM_HORIZON_DEBUG is not set
CONFIG_ATM_FORE200E_MAYBE=m
-CONFIG_ATM_FORE200E_PCA=y
-CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
-# CONFIG_ATM_FORE200E_USE_TASKLET is not set
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_FORE200E=m
+# CONFIG_ATM_FORE200E_PCA is not set
CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
+# CONFIG_ATM_HE_USE_SUNI is not set
CONFIG_FDDI=y
-CONFIG_DEFXX=m
+# CONFIG_DEFXX is not set
CONFIG_SKFP=m
-CONFIG_HIPPI=y
-CONFIG_ROADRUNNER=m
-# CONFIG_ROADRUNNER_LARGE_RINGS is not set
+# CONFIG_HIPPI is not set
CONFIG_PLIP=m
CONFIG_PPP=m
CONFIG_PPP_MULTILINK=y
@@ -1216,15 +1135,15 @@
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_BSDCOMP is not set
CONFIG_PPPOE=m
CONFIG_PPPOATM=m
CONFIG_SLIP=m
CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
+# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_NET_FC=y
-CONFIG_SHAPER=m
+# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=m
#
@@ -1240,16 +1159,15 @@
CONFIG_ISDN_PPP_VJ=y
CONFIG_ISDN_MPP=y
CONFIG_IPPP_FILTER=y
-CONFIG_ISDN_PPP_BSDCOMP=m
+# CONFIG_ISDN_PPP_BSDCOMP is not set
CONFIG_ISDN_AUDIO=y
CONFIG_ISDN_TTY_FAX=y
-CONFIG_ISDN_X25=y
#
# ISDN feature submodules
#
CONFIG_ISDN_DRV_LOOP=m
-# CONFIG_ISDN_DIVERSION is not set
+CONFIG_ISDN_DIVERSION=m
#
# ISDN4Linux hardware drivers
@@ -1265,9 +1183,9 @@
#
CONFIG_HISAX_EURO=y
CONFIG_DE_AOC=y
-# CONFIG_HISAX_NO_SENDCOMPLETE is not set
-# CONFIG_HISAX_NO_LLC is not set
-# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_NO_SENDCOMPLETE=y
+CONFIG_HISAX_NO_LLC=y
+CONFIG_HISAX_NO_KEYPAD=y
CONFIG_HISAX_1TR6=y
CONFIG_HISAX_NI1=y
CONFIG_HISAX_MAX_CARDS=8
@@ -1342,19 +1260,12 @@
#
# Active Eicon DIVA Server cards
#
-CONFIG_CAPI_EICON=y
-CONFIG_ISDN_DIVAS=m
-CONFIG_ISDN_DIVAS_BRIPCI=y
-CONFIG_ISDN_DIVAS_PRIPCI=y
-CONFIG_ISDN_DIVAS_DIVACAPI=m
-CONFIG_ISDN_DIVAS_USERIDI=m
-CONFIG_ISDN_DIVAS_MAINT=m
+# CONFIG_CAPI_EICON is not set
#
# Telephony Support
#
-CONFIG_PHONE=m
-CONFIG_PHONE_IXJ=m
+# CONFIG_PHONE is not set
#
# Input device support
@@ -1365,27 +1276,25 @@
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_TSDEV=m
-CONFIG_INPUT_TSDEV_SCREEN_X=240
-CONFIG_INPUT_TSDEV_SCREEN_Y=320
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_EVBUG=m
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_NEWTON=m
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_SERIAL=m
CONFIG_MOUSE_VSXXXAA=m
CONFIG_INPUT_JOYSTICK=y
@@ -1427,19 +1336,19 @@
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIO_RAW=m
+# CONFIG_SERIO_RAW is not set
CONFIG_GAMEPORT=m
CONFIG_GAMEPORT_NS558=m
CONFIG_GAMEPORT_L4=m
CONFIG_GAMEPORT_EMU10K1=m
CONFIG_GAMEPORT_VORTEX=m
CONFIG_GAMEPORT_FM801=m
-# CONFIG_GAMEPORT_CS461X is not set
+CONFIG_GAMEPORT_CS461X=m
#
# Character devices
@@ -1452,21 +1361,16 @@
#
# Serial drivers
#
-CONFIG_SERIAL_8250=m
-# CONFIG_SERIAL_8250_ACPI is not set
-CONFIG_SERIAL_8250_NR_UARTS=4
-# CONFIG_SERIAL_8250_EXTENDED is not set
+# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
#
-CONFIG_SERIAL_CORE=m
-CONFIG_SERIAL_JSM=m
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_LEGACY_PTYS is not set
CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
+CONFIG_LP_CONSOLE=y
CONFIG_PPDEV=m
CONFIG_TIPAR=m
@@ -1500,7 +1404,7 @@
CONFIG_WAFER_WDT=m
CONFIG_I8XX_TCO=m
CONFIG_SC1200_WDT=m
-CONFIG_60XX_WDT=m
+# CONFIG_60XX_WDT is not set
CONFIG_CPU5_WDT=m
CONFIG_W83627HF_WDT=m
CONFIG_W83877F_WDT=m
@@ -1518,67 +1422,31 @@
#
CONFIG_USBPCWATCHDOG=m
CONFIG_HW_RANDOM=m
-CONFIG_NVRAM=m
-CONFIG_RTC=m
-CONFIG_GEN_RTC=m
-CONFIG_GEN_RTC_X=y
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
CONFIG_DTLK=m
CONFIG_R3964=m
-CONFIG_APPLICOM=m
+# CONFIG_APPLICOM is not set
#
# Ftape, the floppy tape device driver
#
-CONFIG_FTAPE=m
-CONFIG_ZFTAPE=m
-CONFIG_ZFT_DFLT_BLK_SZ=10240
-
-#
-# The compressor will be built as a module only!
-#
-CONFIG_ZFT_COMPRESSOR=m
-CONFIG_FT_NR_BUFFERS=3
-CONFIG_FT_PROC_FS=y
-CONFIG_FT_NORMAL_DEBUG=y
-# CONFIG_FT_FULL_DEBUG is not set
-# CONFIG_FT_NO_TRACE is not set
-# CONFIG_FT_NO_TRACE_AT_ALL is not set
-
-#
-# Hardware configuration
-#
-CONFIG_FT_STD_FDC=y
-# CONFIG_FT_MACH2 is not set
-# CONFIG_FT_PROBE_FC10 is not set
-# CONFIG_FT_ALT_FDC is not set
-CONFIG_FT_FDC_THR=8
-CONFIG_FT_FDC_MAX_RATE=2000
-CONFIG_FT_ALPHA_CLOCK=0
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
CONFIG_DRM=m
CONFIG_DRM_TDFX=m
# CONFIG_DRM_GAMMA is not set
CONFIG_DRM_R128=m
CONFIG_DRM_RADEON=m
-CONFIG_DRM_I810=m
-CONFIG_DRM_I830=m
-CONFIG_DRM_I915=m
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-CONFIG_MWAVE=m
-CONFIG_RAW_DRIVER=m
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
# CONFIG_HPET is not set
-CONFIG_MAX_RAW_DEVS=256
CONFIG_HANGCHECK_TIMER=m
#
# TPM devices
#
-CONFIG_TCG_TPM=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
+# CONFIG_TCG_TPM is not set
#
# I2C support
@@ -1596,24 +1464,24 @@
#
# I2C Hardware Bus support
#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
CONFIG_I2C_AMD756=m
CONFIG_I2C_AMD756_S4882=m
CONFIG_I2C_AMD8111=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_I810=m
-CONFIG_I2C_PIIX4=m
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
CONFIG_I2C_ISA=m
CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
+# CONFIG_I2C_PARPORT is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
CONFIG_I2C_PROSAVAGE=m
CONFIG_I2C_SAVAGE4=m
-CONFIG_SCx200_ACB=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
CONFIG_I2C_SIS96X=m
CONFIG_I2C_STUB=m
CONFIG_I2C_VIA=m
@@ -1648,7 +1516,7 @@
CONFIG_SENSORS_LM92=m
CONFIG_SENSORS_MAX1619=m
CONFIG_SENSORS_PC87360=m
-# CONFIG_SENSORS_SMSC47B397 is not set
+CONFIG_SENSORS_SMSC47B397=m
CONFIG_SENSORS_SIS5595=m
CONFIG_SENSORS_SMSC47M1=m
CONFIG_SENSORS_VIA686A=m
@@ -1682,7 +1550,7 @@
#
# Misc devices
#
-CONFIG_IBM_ASM=m
+# CONFIG_IBM_ASM is not set
#
# Multimedia devices
@@ -1714,13 +1582,14 @@
CONFIG_VIDEO_ZORAN_LML33=m
CONFIG_VIDEO_ZORAN_LML33R10=m
# CONFIG_VIDEO_ZR36120 is not set
-# CONFIG_VIDEO_SAA7134 is not set
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_DVB=m
CONFIG_VIDEO_MXB=m
CONFIG_VIDEO_DPC=m
CONFIG_VIDEO_HEXIUM_ORION=m
CONFIG_VIDEO_HEXIUM_GEMINI=m
CONFIG_VIDEO_CX88=m
-# CONFIG_VIDEO_CX88_DVB is not set
+CONFIG_VIDEO_CX88_DVB=m
CONFIG_VIDEO_OVCAMCHIP=m
#
@@ -1740,7 +1609,7 @@
# Supported SAA7146 based PCI Adapters
#
CONFIG_DVB_AV7110=m
-# CONFIG_DVB_AV7110_OSD is not set
+CONFIG_DVB_AV7110_OSD=y
CONFIG_DVB_BUDGET=m
CONFIG_DVB_BUDGET_CI=m
CONFIG_DVB_BUDGET_AV=m
@@ -1755,7 +1624,12 @@
CONFIG_DVB_DIBUSB_MISDESIGNED_DEVICES=y
# CONFIG_DVB_DIBCOM_DEBUG is not set
CONFIG_DVB_CINERGYT2=m
-# CONFIG_DVB_CINERGYT2_TUNING is not set
+CONFIG_DVB_CINERGYT2_TUNING=y
+CONFIG_DVB_CINERGYT2_STREAM_URB_COUNT=32
+CONFIG_DVB_CINERGYT2_STREAM_BUF_SIZE=512
+CONFIG_DVB_CINERGYT2_QUERY_INTERVAL=250
+CONFIG_DVB_CINERGYT2_ENABLE_RC_INPUT_DEVICE=y
+CONFIG_DVB_CINERGYT2_RC_QUERY_INTERVAL=100
#
# Supported FlexCopII (B2C2) Adapters
@@ -1822,6 +1696,7 @@
CONFIG_VIDEO_VIDEOBUF=m
CONFIG_VIDEO_TUNER=m
CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BUF_DVB=m
CONFIG_VIDEO_BTCX=m
CONFIG_VIDEO_IR=m
CONFIG_VIDEO_TVEEPROM=m
@@ -1830,36 +1705,34 @@
# Graphics support
#
CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=m
-CONFIG_FB_CFB_COPYAREA=m
-CONFIG_FB_CFB_IMAGEBLIT=m
-CONFIG_FB_SOFT_CURSOR=m
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+CONFIG_FB_SOFT_CURSOR=y
# CONFIG_FB_MACMODES is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_CIRRUS=m
-CONFIG_FB_PM2=m
-CONFIG_FB_PM2_FIFO_DISCONNECT=y
-CONFIG_FB_CYBER2000=m
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
CONFIG_FB_VGA16=m
-# CONFIG_FB_VESA is not set
+CONFIG_FB_VESA=y
CONFIG_VIDEO_SELECT=y
-CONFIG_FB_HGA=m
-# CONFIG_FB_HGA_ACCEL is not set
-CONFIG_FB_NVIDIA=m
-CONFIG_FB_NVIDIA_I2C=y
+# CONFIG_FB_HGA is not set
+# CONFIG_FB_NVIDIA is not set
CONFIG_FB_RIVA=m
-CONFIG_FB_RIVA_I2C=y
-CONFIG_FB_RIVA_DEBUG=y
+# CONFIG_FB_RIVA_I2C is not set
+# CONFIG_FB_RIVA_DEBUG is not set
CONFIG_FB_MATROX=m
CONFIG_FB_MATROX_MILLENIUM=y
CONFIG_FB_MATROX_MYSTIQUE=y
-# CONFIG_FB_MATROX_G is not set
+CONFIG_FB_MATROX_G=y
CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
CONFIG_FB_MATROX_MULTIHEAD=y
-CONFIG_FB_RADEON_OLD=m
+# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=m
CONFIG_FB_RADEON_I2C=y
# CONFIG_FB_RADEON_DEBUG is not set
@@ -1867,33 +1740,30 @@
CONFIG_FB_ATY=m
CONFIG_FB_ATY_CT=y
CONFIG_FB_ATY_GENERIC_LCD=y
-CONFIG_FB_ATY_XL_INIT=y
+# CONFIG_FB_ATY_XL_INIT is not set
CONFIG_FB_ATY_GX=y
CONFIG_FB_SAVAGE=m
CONFIG_FB_SAVAGE_I2C=y
CONFIG_FB_SAVAGE_ACCEL=y
-CONFIG_FB_SIS=m
-CONFIG_FB_SIS_300=y
-CONFIG_FB_SIS_315=y
+# CONFIG_FB_SIS is not set
CONFIG_FB_NEOMAGIC=m
CONFIG_FB_KYRO=m
CONFIG_FB_3DFX=m
-# CONFIG_FB_3DFX_ACCEL is not set
+CONFIG_FB_3DFX_ACCEL=y
CONFIG_FB_VOODOO1=m
CONFIG_FB_TRIDENT=m
-# CONFIG_FB_TRIDENT_ACCEL is not set
+CONFIG_FB_TRIDENT_ACCEL=y
# CONFIG_FB_PM3 is not set
-CONFIG_FB_GEODE=y
-CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_S1D13XXX=m
-CONFIG_FB_VIRTUAL=m
+# CONFIG_FB_GEODE is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=m
+CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
@@ -1901,8 +1771,15 @@
#
# Logo configuration
#
-# CONFIG_LOGO is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_LOGO_LINUX_CLUT224=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=m
+CONFIG_BACKLIGHT_DEVICE=y
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
#
# Sound
@@ -1936,7 +1813,7 @@
CONFIG_SND_DUMMY=m
CONFIG_SND_VIRMIDI=m
CONFIG_SND_MTPAV=m
-CONFIG_SND_SERIAL_U16550=m
+# CONFIG_SND_SERIAL_U16550 is not set
CONFIG_SND_MPU401=m
#
@@ -1956,8 +1833,8 @@
CONFIG_SND_CS46XX_NEW_DSP=y
CONFIG_SND_CS4281=m
CONFIG_SND_EMU10K1=m
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_CA0106 is not set
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_CA0106=m
CONFIG_SND_KORG1212=m
CONFIG_SND_MIXART=m
CONFIG_SND_NM256=m
@@ -1982,7 +1859,7 @@
CONFIG_SND_INTEL8X0M=m
CONFIG_SND_SONICVIBES=m
CONFIG_SND_VIA82XX=m
-# CONFIG_SND_VIA82XX_MODEM is not set
+CONFIG_SND_VIA82XX_MODEM=m
CONFIG_SND_VX222=m
CONFIG_SND_HDA_INTEL=m
@@ -1995,75 +1872,7 @@
#
# Open Sound System
#
-CONFIG_SOUND_PRIME=m
-CONFIG_SOUND_BT878=m
-CONFIG_SOUND_CMPCI=m
-# CONFIG_SOUND_CMPCI_FM is not set
-# CONFIG_SOUND_CMPCI_MIDI is not set
-CONFIG_SOUND_CMPCI_JOYSTICK=y
-CONFIG_SOUND_EMU10K1=m
-CONFIG_MIDI_EMU10K1=y
-CONFIG_SOUND_FUSION=m
-CONFIG_SOUND_CS4281=m
-CONFIG_SOUND_ES1370=m
-CONFIG_SOUND_ES1371=m
-CONFIG_SOUND_ESSSOLO1=m
-CONFIG_SOUND_MAESTRO=m
-CONFIG_SOUND_MAESTRO3=m
-CONFIG_SOUND_ICH=m
-CONFIG_SOUND_SONICVIBES=m
-CONFIG_SOUND_TRIDENT=m
-# CONFIG_SOUND_MSNDCLAS is not set
-# CONFIG_SOUND_MSNDPIN is not set
-CONFIG_SOUND_VIA82CXXX=m
-CONFIG_MIDI_VIA82CXXX=y
-CONFIG_SOUND_OSS=m
-# CONFIG_SOUND_TRACEINIT is not set
-# CONFIG_SOUND_DMAP is not set
-# CONFIG_SOUND_AD1816 is not set
-CONFIG_SOUND_AD1889=m
-CONFIG_SOUND_SGALAXY=m
-CONFIG_SOUND_ADLIB=m
-CONFIG_SOUND_ACI_MIXER=m
-CONFIG_SOUND_CS4232=m
-CONFIG_SOUND_SSCAPE=m
-CONFIG_SOUND_GUS=m
-CONFIG_SOUND_GUS16=y
-CONFIG_SOUND_GUSMAX=y
-CONFIG_SOUND_VMIDI=m
-CONFIG_SOUND_TRIX=m
-CONFIG_SOUND_MSS=m
-CONFIG_SOUND_MPU401=m
-CONFIG_SOUND_NM256=m
-CONFIG_SOUND_MAD16=m
-CONFIG_MAD16_OLDCARD=y
-CONFIG_SOUND_PAS=m
-CONFIG_SOUND_PSS=m
-CONFIG_PSS_MIXER=y
-CONFIG_SOUND_SB=m
-# CONFIG_SOUND_AWE32_SYNTH is not set
-CONFIG_SOUND_WAVEFRONT=m
-CONFIG_SOUND_MAUI=m
-CONFIG_SOUND_YM3812=m
-CONFIG_SOUND_OPL3SA1=m
-CONFIG_SOUND_OPL3SA2=m
-CONFIG_SOUND_YMFPCI=m
-# CONFIG_SOUND_YMFPCI_LEGACY is not set
-CONFIG_SOUND_UART6850=m
-CONFIG_SOUND_AEDSP16=m
-CONFIG_SC6600=y
-CONFIG_SC6600_JOY=y
-CONFIG_SC6600_CDROM=4
-CONFIG_SC6600_CDROMBASE=0x0
-# CONFIG_AEDSP16_MSS is not set
-# CONFIG_AEDSP16_SBPRO is not set
-# CONFIG_AEDSP16_MPU401 is not set
-CONFIG_SOUND_TVMIXER=m
-CONFIG_SOUND_KAHLUA=m
-CONFIG_SOUND_ALI5455=m
-CONFIG_SOUND_FORTE=m
-CONFIG_SOUND_RME96XX=m
-CONFIG_SOUND_AD1980=m
+# CONFIG_SOUND_PRIME is not set
#
# USB support
@@ -2077,14 +1886,14 @@
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
-CONFIG_USB_BANDWIDTH=y
+# CONFIG_USB_BANDWIDTH is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_OTG is not set
#
# USB Host Controller Drivers
#
-CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD=m
CONFIG_USB_EHCI_SPLIT_ISO=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
CONFIG_USB_OHCI_HCD=m
@@ -2096,7 +1905,7 @@
#
# USB Device Class drivers
#
-CONFIG_USB_AUDIO=m
+# CONFIG_USB_AUDIO is not set
#
# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
@@ -2122,16 +1931,13 @@
#
# USB Input Devices
#
-CONFIG_USB_HID=m
+CONFIG_USB_HID=y
CONFIG_USB_HIDINPUT=y
-# CONFIG_HID_FF is not set
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-CONFIG_USB_KBD=y
-CONFIG_USB_MOUSE=y
CONFIG_USB_AIPTEK=m
CONFIG_USB_WACOM=m
CONFIG_USB_KBTAB=m
@@ -2150,7 +1956,7 @@
#
# USB Multimedia devices
#
-# CONFIG_USB_DABUSB is not set
+CONFIG_USB_DABUSB=m
CONFIG_USB_VICAM=m
CONFIG_USB_DSBR=m
CONFIG_USB_IBMCAM=m
@@ -2220,30 +2026,30 @@
CONFIG_USB_SERIAL_IR=m
CONFIG_USB_SERIAL_EDGEPORT=m
CONFIG_USB_SERIAL_EDGEPORT_TI=m
-# CONFIG_USB_SERIAL_GARMIN is not set
+CONFIG_USB_SERIAL_GARMIN=m
CONFIG_USB_SERIAL_IPW=m
CONFIG_USB_SERIAL_KEYSPAN_PDA=m
CONFIG_USB_SERIAL_KEYSPAN=m
-# CONFIG_USB_SERIAL_KEYSPAN_MPR is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28XA is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28XB is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19QW is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19QI is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA49WLC is not set
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
CONFIG_USB_SERIAL_PL2303=m
CONFIG_USB_SERIAL_HP4X=m
CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-# CONFIG_USB_SERIAL_TI is not set
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_TI=m
CONFIG_USB_SERIAL_CYBERJACK=m
CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
@@ -2252,17 +2058,17 @@
#
# USB Miscellaneous drivers
#
-# CONFIG_USB_EMI62 is not set
+CONFIG_USB_EMI62=m
# CONFIG_USB_EMI26 is not set
CONFIG_USB_AUERSWALD=m
CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_LED=m
-CONFIG_USB_CYTHERM=m
+# CONFIG_USB_CYTHERM is not set
CONFIG_USB_PHIDGETKIT=m
CONFIG_USB_PHIDGETSERVO=m
-# CONFIG_USB_IDMOUSE is not set
+CONFIG_USB_IDMOUSE=m
CONFIG_USB_SISUSBVGA=m
CONFIG_USB_TEST=m
@@ -2275,33 +2081,24 @@
#
# USB Gadget Support
#
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-CONFIG_USB_GADGET_NET2280=y
-CONFIG_USB_NET2280=m
-# CONFIG_USB_GADGET_PXA2XX is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-CONFIG_USB_GADGET_DUALSPEED=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FILE_STORAGE=m
-# CONFIG_USB_FILE_STORAGE_TEST is not set
-CONFIG_USB_G_SERIAL=m
+# CONFIG_USB_GADGET is not set
#
# MMC/SD Card support
#
-# CONFIG_MMC is not set
+CONFIG_MMC=m
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_WBSD=m
#
# InfiniBand support
#
-# CONFIG_INFINIBAND is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
#
# Power management options
@@ -2335,7 +2132,7 @@
#
# File systems
#
-CONFIG_EXT2_FS=m
+CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
@@ -2345,16 +2142,18 @@
CONFIG_EXT3_FS_SECURITY=y
CONFIG_JBD=m
# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=m
+CONFIG_FS_MBCACHE=y
CONFIG_REISERFS_FS=m
# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-# CONFIG_REISERFS_FS_XATTR is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
-# CONFIG_JFS_SECURITY is not set
+CONFIG_JFS_SECURITY=y
# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
+# CONFIG_JFS_STATISTICS is not set
CONFIG_FS_POSIX_ACL=y
#
@@ -2362,15 +2161,15 @@
#
CONFIG_XFS_FS=m
CONFIG_XFS_EXPORT=y
-CONFIG_XFS_RT=y
+# CONFIG_XFS_RT is not set
CONFIG_XFS_QUOTA=y
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_MINIX_FS=m
CONFIG_ROMFS_FS=m
CONFIG_QUOTA=y
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=m
@@ -2379,10 +2178,10 @@
#
# CD-ROM/DVD Filesystems
#
-CONFIG_ISO9660_FS=m
+CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
-CONFIG_ZISOFS_FS=m
+CONFIG_ZISOFS_FS=y
CONFIG_UDF_FS=m
CONFIG_UDF_NLS=y
@@ -2393,10 +2192,8 @@
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-# CONFIG_NTFS_RW is not set
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+# CONFIG_NTFS_FS is not set
#
# Pseudo filesystems
@@ -2404,9 +2201,7 @@
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
-CONFIG_DEVFS_FS=y
-# CONFIG_DEVFS_MOUNT is not set
-# CONFIG_DEVFS_DEBUG is not set
+# CONFIG_DEVFS_FS is not set
CONFIG_DEVPTS_FS_XATTR=y
CONFIG_DEVPTS_FS_SECURITY=y
CONFIG_TMPFS=y
@@ -2419,8 +2214,7 @@
#
# Miscellaneous filesystems
#
-CONFIG_ADFS_FS=m
-# CONFIG_ADFS_FS_RW is not set
+# CONFIG_ADFS_FS is not set
CONFIG_AFFS_FS=m
CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
@@ -2428,20 +2222,18 @@
# CONFIG_BEFS_DEBUG is not set
CONFIG_BFS_FS=m
CONFIG_EFS_FS=m
-CONFIG_JFFS_FS=m
-CONFIG_JFFS_FS_VERBOSE=0
-CONFIG_JFFS_PROC_FS=y
+# CONFIG_JFFS_FS is not set
CONFIG_JFFS2_FS=m
CONFIG_JFFS2_FS_DEBUG=0
-# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_JFFS2_FS_NAND=y
# CONFIG_JFFS2_FS_NOR_ECC is not set
# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
CONFIG_JFFS2_ZLIB=y
CONFIG_JFFS2_RTIME=y
# CONFIG_JFFS2_RUBIN is not set
-CONFIG_CRAMFS=y
+CONFIG_CRAMFS=m
CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
+# CONFIG_HPFS_FS is not set
CONFIG_QNX4FS_FS=m
# CONFIG_QNX4FS_RW is not set
CONFIG_SYSV_FS=m
@@ -2470,7 +2262,8 @@
# CONFIG_SMB_NLS_DEFAULT is not set
CONFIG_CIFS=m
# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
# CONFIG_CIFS_EXPERIMENTAL is not set
CONFIG_NCP_FS=m
CONFIG_NCPFS_PACKET_SIGNING=y
@@ -2478,38 +2271,29 @@
CONFIG_NCPFS_STRONG=y
CONFIG_NCPFS_NFS_NS=y
CONFIG_NCPFS_OS2_NS=y
-# CONFIG_NCPFS_SMALLDOS is not set
+CONFIG_NCPFS_SMALLDOS=y
CONFIG_NCPFS_NLS=y
CONFIG_NCPFS_EXTRAS=y
-CONFIG_CODA_FS=m
-# CONFIG_CODA_FS_OLD_API is not set
-CONFIG_AFS_FS=m
-CONFIG_RXRPC=m
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
-CONFIG_ACORN_PARTITION=y
-CONFIG_ACORN_PARTITION_CUMANA=y
-# CONFIG_ACORN_PARTITION_EESOX is not set
-CONFIG_ACORN_PARTITION_ICS=y
-# CONFIG_ACORN_PARTITION_ADFS is not set
-# CONFIG_ACORN_PARTITION_POWERTEC is not set
-CONFIG_ACORN_PARTITION_RISCIX=y
+# CONFIG_ACORN_PARTITION is not set
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
-CONFIG_ATARI_PARTITION=y
+# CONFIG_ATARI_PARTITION is not set
CONFIG_MAC_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
+# CONFIG_LDM_PARTITION is not set
CONFIG_SGI_PARTITION=y
-CONFIG_ULTRIX_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
CONFIG_SUN_PARTITION=y
CONFIG_EFI_PARTITION=y
@@ -2517,8 +2301,8 @@
# Native Language Support
#
CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="cp437"
-CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_737=m
CONFIG_NLS_CODEPAGE_775=m
CONFIG_NLS_CODEPAGE_850=m
@@ -2541,7 +2325,7 @@
CONFIG_NLS_ISO8859_8=m
CONFIG_NLS_CODEPAGE_1250=m
CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
+CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_ISO8859_2=m
CONFIG_NLS_ISO8859_3=m
@@ -2561,15 +2345,15 @@
# Security options
#
CONFIG_KEYS=y
-# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
-# CONFIG_SECURITY_NETWORK is not set
-CONFIG_SECURITY_CAPABILITIES=m
-CONFIG_SECURITY_ROOTPLUG=m
-CONFIG_SECURITY_SECLVL=m
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_ROOTPLUG is not set
+# CONFIG_SECURITY_SECLVL is not set
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
@@ -2582,8 +2366,8 @@
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_WP512=m
@@ -2602,7 +2386,7 @@
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_TEST=m
+# CONFIG_CRYPTO_TEST is not set
#
# Hardware crypto devices
@@ -2616,8 +2400,6 @@
CONFIG_LIBCRC32C=m
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=m
-CONFIG_REED_SOLOMON=m
-CONFIG_REED_SOLOMON_DEC16=y
#
# Kernel hacking
@@ -2625,7 +2407,7 @@
# CONFIG_PRINTK_TIME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_LOG_BUF_SHIFT=15
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_SPINLOCK is not set
@@ -2640,5 +2422,4 @@
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
-# CONFIG_CHECKING is not set
# CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Thu Aug 25 22:53:20 2005
@@ -533,6 +533,11 @@
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
+config SWIOTLB
+ bool
+ depends on PCI
+ default y
+
source "drivers/firmware/Kconfig"
choice
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Thu Aug 25
22:53:20 2005
@@ -14,8 +14,7 @@
c-obj-y := semaphore.o vm86.o \
ptrace.o sys_i386.o \
- i387.o dmi_scan.o bootflag.o \
- doublefault.o
+ i387.o dmi_scan.o bootflag.o
s-obj-y :=
obj-y += cpu/
@@ -44,6 +43,7 @@
c-obj-$(CONFIG_EFI) += efi.o efi_stub.o
c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
EXTRA_AFLAGS := -traditional
@@ -84,7 +84,7 @@
$(obj)/vsyscall-sysenter.o FORCE
$(call if_changed,syscall)
-c-link := init_task.o
+c-link :=
s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o
vsyscall.lds.o syscall_table.o
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@@ -92,6 +92,7 @@
$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+EXTRA_AFLAGS += -I$(obj)
$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
obj-y += $(c-obj-y) $(s-obj-y)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Thu Aug 25
22:53:20 2005
@@ -19,11 +19,13 @@
#include "cpu.h"
+#ifndef CONFIG_XEN
DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
static int cachesize_override __initdata = -1;
static int disable_x86_fxsr __initdata = 0;
@@ -569,7 +571,7 @@
for (va = gdt_descr->address, f = 0;
va < gdt_descr->address + gdt_descr->size;
va += PAGE_SIZE, f++) {
- frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Thu Aug 25 22:53:20 2005
@@ -136,9 +136,6 @@
ENTRY(empty_zero_page)
.org 0x2000
-ENTRY(swapper_pg_dir)
-
-.org 0x3000
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* 0x0b reserved */
@@ -190,10 +187,10 @@
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault
TSS */
.fill GDT_ENTRIES-32,8,0
-.org 0x4000
+.org 0x3000
ENTRY(default_ldt)
-.org 0x5000
+.org 0x4000
/*
* Real beginning of normal "text" segment
*/
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Thu Aug 25
22:53:20 2005
@@ -115,9 +115,6 @@
EXPORT_SYMBOL(__copy_to_user_ll);
EXPORT_SYMBOL(strnlen_user);
-EXPORT_SYMBOL(dma_alloc_coherent);
-EXPORT_SYMBOL(dma_free_coherent);
-
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c Thu Aug 25
22:53:20 2005
@@ -80,7 +80,7 @@
t->io_bitmap_ptr = bitmap;
op.cmd = PHYSDEVOP_SET_IOBITMAP;
- op.u.set_iobitmap.bitmap = (unsigned long)bitmap;
+ op.u.set_iobitmap.bitmap = (char *)bitmap;
op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
HYPERVISOR_physdev_op(&op);
}
@@ -113,16 +113,12 @@
if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
return -EPERM;
- /* Maintain OS privileges even if user attempts to relinquish them. */
- if (new_io_pl == 0)
- new_io_pl = 1;
-
/* Change our version of the privilege levels. */
current->thread.io_pl = new_io_pl;
/* Force the change at ring 0. */
op.cmd = PHYSDEVOP_SET_IOPL;
- op.u.set_iopl.iopl = new_io_pl;
+ op.u.set_iopl.iopl = (new_io_pl == 0) ? 1 : new_io_pl;
HYPERVISOR_physdev_op(&op);
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Thu Aug 25 22:53:20 2005
@@ -198,7 +198,7 @@
{
struct mm_struct * mm = current->mm;
__u32 entry_1, entry_2, *lp;
- unsigned long mach_lp;
+ maddr_t mach_lp;
int error;
struct user_desc ldt_info;
@@ -245,7 +245,8 @@
/* Install the new entry ... */
install:
- error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+ error = HYPERVISOR_update_descriptor(
+ mach_lp, (u64)entry_1 | ((u64)entry_2<<32));
out_unlock:
up(&mm->context.sem);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Thu Aug 25
22:53:20 2005
@@ -784,7 +784,9 @@
void __init find_smp_config (void)
{
+#ifndef CONFIG_XEN
unsigned int address;
+#endif
/*
* FIXME: Linux assumes you have 640K of base ram..
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Thu Aug 25
22:53:20 2005
@@ -23,6 +23,104 @@
int flags;
unsigned long *bitmap;
};
+
+#define IOMMU_BUG_ON(test) \
+do { \
+ if (unlikely(test)) { \
+ printk(KERN_ALERT "Fatal DMA error! " \
+ "Please use 'swiotlb=force'\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i, rc;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+ } else {
+ for (i = 0; i < nents; i++ ) {
+ sg[i].dma_address =
+ page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_length = sg[i].length;
+ BUG_ON(!sg[i].page);
+ IOMMU_BUG_ON(address_needs_mapping(
+ hwdev, sg[i].dma_address));
+ }
+ rc = nents;
+ }
+
+ flush_write_buffers();
+ return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_sg(hwdev, sg, nents, direction);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_addr;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ dma_addr = swiotlb_map_page(
+ dev, page, offset, size, direction);
+ } else {
+ dma_addr = page_to_phys(page) + offset;
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+ }
+
+ return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_page(dev, dma_address, size, direction);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+ if (swiotlb)
+ return swiotlb_dma_mapping_error(dma_addr);
+ return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+ if (swiotlb)
+ return swiotlb_dma_supported(dev, mask);
+ /*
+ * By default we'll BUG when an infeasible DMA is requested, and
+ * request swiotlb=force (see IOMMU_BUG_ON).
+ */
+ return 1;
+}
+EXPORT_SYMBOL(dma_supported);
void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, unsigned int __nocast gfp)
@@ -54,13 +152,14 @@
ret = (void *)vstart;
if (ret != NULL) {
- xen_contig_memory(vstart, order);
+ xen_create_contiguous_region(vstart, order);
memset(ret, 0, size);
*dma_handle = virt_to_bus(ret);
}
return ret;
}
+EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle)
@@ -72,9 +171,12 @@
int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
bitmap_release_region(mem->bitmap, page, order);
- } else
+ } else {
+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
free_pages((unsigned long)vaddr, order);
-}
+ }
+}
+EXPORT_SYMBOL(dma_free_coherent);
int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
dma_addr_t device_addr, size_t size, int flags)
@@ -153,46 +255,20 @@
}
EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
- struct list_head list;
- dma_addr_t dma;
- char *bounce, *host;
- size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
dma_addr_t
dma_map_single(struct device *dev, void *ptr, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- void *bnc;
dma_addr_t dma;
- unsigned long flags;
-
- BUG_ON(direction == DMA_NONE);
-
- /*
- * Even if size is sub-page, the buffer may still straddle a page
- * boundary. Take into account buffer start offset. All other calls are
- * conservative and always search the dma_map list if it's non-empty.
- */
- if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) {
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ dma = swiotlb_map_single(dev, ptr, size, direction);
+ } else {
dma = virt_to_bus(ptr);
- } else {
- BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, 0)) == NULL);
- BUG_ON((ent = kmalloc(sizeof(*ent), GFP_KERNEL)) == NULL);
- if (direction != DMA_FROM_DEVICE)
- memcpy(bnc, ptr, size);
- ent->dma = dma;
- ent->bounce = bnc;
- ent->host = ptr;
- ent->size = size;
- spin_lock_irqsave(&dma_map_lock, flags);
- list_add(&ent->list, &dma_map_head);
- spin_unlock_irqrestore(&dma_map_lock, flags);
+ IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma));
}
flush_write_buffers();
@@ -204,30 +280,9 @@
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags;
-
- BUG_ON(direction == DMA_NONE);
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list ) {
- if (DMA_MAP_MATCHES(ent, dma_addr)) {
- list_del(&ent->list);
- break;
- }
- }
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- BUG_ON(dma_addr != ent->dma);
- BUG_ON(size != ent->size);
- if (direction != DMA_TO_DEVICE)
- memcpy(ent->host, ent->bounce, size);
- dma_free_coherent(dev, size, ent->bounce, ent->dma);
- kfree(ent);
- }
- }
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_single(dev, dma_addr, size, direction);
}
EXPORT_SYMBOL(dma_unmap_single);
@@ -235,23 +290,8 @@
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_TO_DEVICE)*/
- memcpy(ent->host+off, ent->bounce+off, size);
- }
- }
+ if (swiotlb)
+ swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
}
EXPORT_SYMBOL(dma_sync_single_for_cpu);
@@ -259,24 +299,17 @@
dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_FROM_DEVICE)*/
- memcpy(ent->bounce+off, ent->host+off, size);
- }
- }
-
- flush_write_buffers();
+ if (swiotlb)
+ swiotlb_sync_single_for_device(dev, dma_handle, size,
direction);
}
EXPORT_SYMBOL(dma_sync_single_for_device);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Thu Aug 25
22:53:20 2005
@@ -115,20 +115,12 @@
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /* We shouldn't have to disable interrupts while dead, but
- * some interrupts just don't seem to go away, and this makes
- * it "work" for testing purposes. */
/* Death loop */
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
HYPERVISOR_yield();
- local_irq_disable();
__flush_tlb_all();
cpu_set(smp_processor_id(), cpu_online_map);
- local_irq_enable();
}
#else
static inline void play_dead(void)
@@ -156,12 +148,19 @@
rmb();
if (cpu_is_offline(cpu)) {
+ local_irq_disable();
#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+ /* Ack it. From this point on until
+ we get woken up, we're not allowed
+ to take any locks. In particular,
+ don't printk. */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
/* Tell hypervisor to take vcpu down. */
HYPERVISOR_vcpu_down(cpu);
#endif
play_dead();
- }
+ local_irq_enable();
+ }
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
xen_idle();
@@ -523,23 +522,22 @@
* Load the per-thread Thread-Local Storage descriptor.
* This is load_TLS(next, cpu) with multicalls.
*/
-#define C(i) do { \
- if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
- next->tls_array[i].b != prev->tls_array[i].b)) { \
- mcl->op = __HYPERVISOR_update_descriptor; \
- mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
- [GDT_ENTRY_TLS_MIN + i]); \
- mcl->args[1] = ((u32 *)&next->tls_array[i])[0]; \
- mcl->args[2] = ((u32 *)&next->tls_array[i])[1]; \
- mcl++; \
- } \
+#define C(i) do { \
+ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
+ next->tls_array[i].b != prev->tls_array[i].b)) { \
+ mcl->op = __HYPERVISOR_update_descriptor; \
+ *(u64 *)&mcl->args[0] = virt_to_machine( \
+ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
+ *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \
+ mcl++; \
+ } \
} while (0)
C(0); C(1); C(2);
#undef C
if (unlikely(prev->io_pl != next->io_pl)) {
iopl_op.cmd = PHYSDEVOP_SET_IOPL;
- iopl_op.u.set_iopl.iopl = next->io_pl;
+ iopl_op.u.set_iopl.iopl = (next->io_pl == 0) ? 1 : next->io_pl;
mcl->op = __HYPERVISOR_physdev_op;
mcl->args[0] = (unsigned long)&iopl_op;
mcl++;
@@ -549,7 +547,7 @@
iobmp_op.cmd =
PHYSDEVOP_SET_IOBITMAP;
iobmp_op.u.set_iobitmap.bitmap =
- (unsigned long)next->io_bitmap_ptr;
+ (char *)next->io_bitmap_ptr;
iobmp_op.u.set_iobitmap.nr_ports =
next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
mcl->op = __HYPERVISOR_physdev_op;
@@ -791,3 +789,10 @@
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+
+#ifndef CONFIG_X86_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Aug 25 22:53:20 2005
@@ -1575,19 +1575,20 @@
/* Make sure we have a correctly sized P->M table. */
if (max_pfn != xen_start_info.nr_pages) {
phys_to_machine_mapping = alloc_bootmem_low_pages(
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(unsigned int));
if (max_pfn > xen_start_info.nr_pages) {
/* set to INVALID_P2M_ENTRY */
memset(phys_to_machine_mapping, ~0,
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(unsigned int));
memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- xen_start_info.nr_pages * sizeof(unsigned
long));
+ (unsigned int *)xen_start_info.mfn_list,
+ xen_start_info.nr_pages * sizeof(unsigned int));
} else {
memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- max_pfn * sizeof(unsigned long));
+ (unsigned int *)xen_start_info.mfn_list,
+ max_pfn * sizeof(unsigned int));
+ /* N.B. below relies on sizeof(int) == sizeof(long). */
if (HYPERVISOR_dom_mem_op(
MEMOP_decrease_reservation,
(unsigned long *)xen_start_info.mfn_list +
max_pfn,
@@ -1597,18 +1598,17 @@
free_bootmem(
__pa(xen_start_info.mfn_list),
PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
- sizeof(unsigned long))));
+ sizeof(unsigned int))));
}
pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
- for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+ for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ )
{
pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ virt_to_mfn(&phys_to_machine_mapping[i]);
}
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
- virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
-
+ virt_to_mfn(pfn_to_mfn_frame_list);
/*
* NOTE: at this point the bootmem allocator is fully available.
@@ -1636,7 +1636,7 @@
efi_map_memmap();
op.cmd = PHYSDEVOP_SET_IOPL;
- op.u.set_iopl.iopl = current->thread.io_pl = 1;
+ op.u.set_iopl.iopl = 1;
HYPERVISOR_physdev_op(&op);
#ifdef CONFIG_ACPI_BOOT
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Thu Aug 25
22:53:20 2005
@@ -62,6 +62,8 @@
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
+#include <asm-xen/evtchn.h>
+
/* Set if we find a B stepping CPU */
static int __initdata smp_b_stepping;
@@ -129,15 +131,7 @@
*/
void __init smp_alloc_memory(void)
{
-#if 1
- int cpu;
-
- for (cpu = 1; cpu < NR_CPUS; cpu++) {
- cpu_gdt_descr[cpu].address = (unsigned long)
- alloc_bootmem_low_pages(PAGE_SIZE);
- /* XXX free unused pages later */
- }
-#else
+#if 0
trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
/*
* Has to be in very low memory so we can execute
@@ -859,8 +853,8 @@
atomic_set(&init_deasserted, 0);
#if 1
- if (cpu_gdt_descr[0].size > PAGE_SIZE)
- BUG();
+ cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+ BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
printk("GDT: copying %d bytes from %lx to %lx\n",
cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
@@ -878,7 +872,8 @@
ctxt.user_regs.cs = __KERNEL_CS;
ctxt.user_regs.eip = start_eip;
ctxt.user_regs.esp = idle->thread.esp;
- ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+#define X86_EFLAGS_IOPL_RING1 0x1000
+ ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
/* FPU is set up to default initial state. */
memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
@@ -901,7 +896,7 @@
for (va = cpu_gdt_descr[cpu].address, f = 0;
va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ ctxt.gdt_frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
@@ -917,10 +912,11 @@
ctxt.failsafe_callback_cs = __KERNEL_CS;
ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
+ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
- printk("boot error: %ld\n", boot_error);
+ if (boot_error)
+ printk("boot error: %ld\n", boot_error);
if (!boot_error) {
/*
@@ -1321,14 +1317,127 @@
}
#ifdef CONFIG_HOTPLUG_CPU
-#include <asm-xen/ctrl_if.h>
-
+#include <asm-xen/xenbus.h>
/* hotplug down/up funtion pointer and target vcpu */
struct vcpu_hotplug_handler_t {
- void (*fn)(int vcpu);
+ void (*fn) (int vcpu);
u32 vcpu;
};
static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
+
+static int vcpu_hotplug_cpu_process(void *unused)
+{
+ struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+
+ if (handler->fn) {
+ (*(handler->fn)) (handler->vcpu);
+ handler->fn = NULL;
+ }
+ return 0;
+}
+
+static void __vcpu_hotplug_handler(void *unused)
+{
+ int err;
+
+ err = kernel_thread(vcpu_hotplug_cpu_process,
+ NULL, CLONE_FS | CLONE_FILES);
+ if (err < 0)
+ printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *, const char *);
+static struct notifier_block xsn_cpu;
+
+/* xenbus watch struct */
+static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event
+};
+
+/* NB: Assumes xenbus_lock is held! */
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ int err = 0;
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+ err = register_xenbus_watch(&cpu_watch);
+
+ if (err) {
+ printk("Failed to register watch on /cpu\n");
+ }
+
+ return NOTIFY_DONE;
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, const char
*node)
+{
+ static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
+ struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+ ssize_t ret;
+ int err, cpu;
+ char state[8];
+ char dir[32];
+ char *cpustr;
+
+ /* get a pointer to start of cpu string */
+ if ((cpustr = strstr(node, "cpu/")) != NULL) {
+
+ /* find which cpu state changed, note vcpu for handler */
+ sscanf(cpustr, "cpu/%d", &cpu);
+ handler->vcpu = cpu;
+
+ /* calc the dir for xenbus read */
+ sprintf(dir, "cpu/%d", cpu);
+
+ /* make sure watch that was triggered is changes to the correct
key */
+ if ((strcmp(node + strlen(dir), "/availability")) != 0)
+ return;
+
+ /* get the state value */
+ xenbus_transaction_start("cpu");
+ err = xenbus_scanf(dir, "availability", "%s", state);
+ xenbus_transaction_end(0);
+
+ if (err != 1) {
+ printk(KERN_ERR
+ "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ /* if we detect a state change, take action */
+ if (strcmp(state, "online") == 0) {
+ /* offline -> online */
+ if (!cpu_isset(cpu, cpu_online_map)) {
+ handler->fn = (void *)&cpu_up;
+ ret = schedule_work(&vcpu_hotplug_work);
+ }
+ } else if (strcmp(state, "offline") == 0) {
+ /* online -> offline */
+ if (cpu_isset(cpu, cpu_online_map)) {
+ handler->fn = (void *)&cpu_down;
+ ret = schedule_work(&vcpu_hotplug_work);
+ }
+ } else {
+ printk(KERN_ERR
+ "XENBUS: unknown state(%s) on node(%s)\n", state,
+ node);
+ }
+ }
+ return;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ xsn_cpu.notifier_call = setup_cpu_watcher;
+
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+subsys_initcall(setup_vcpu_hotplug_event);
/* must be called with the cpucontrol mutex held */
static int __devinit cpu_enable(unsigned int cpu)
@@ -1398,77 +1507,6 @@
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
-static int vcpu_hotplug_cpu_process(void *unused)
-{
- struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
-
- if (handler->fn) {
- (*(handler->fn))(handler->vcpu);
- handler->fn = NULL;
- }
- return 0;
-}
-
-static void __vcpu_hotplug_handler(void *unused)
-{
- int err;
-
- err = kernel_thread(vcpu_hotplug_cpu_process,
- NULL, CLONE_FS | CLONE_FILES);
- if (err < 0)
- printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
-
-}
-
-static void vcpu_hotplug_event_handler(ctrl_msg_t *msg, unsigned long id)
-{
- static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
- vcpu_hotplug_t *req = (vcpu_hotplug_t *)&msg->msg[0];
- struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
- ssize_t ret;
-
- if (msg->length != sizeof(vcpu_hotplug_t))
- goto parse_error;
-
- /* grab target vcpu from msg */
- handler->vcpu = req->vcpu;
-
- /* determine which function to call based on msg subtype */
- switch (msg->subtype) {
- case CMSG_VCPU_HOTPLUG_OFF:
- handler->fn = (void *)&cpu_down;
- ret = schedule_work(&vcpu_hotplug_work);
- req->status = (u32) ret;
- break;
- case CMSG_VCPU_HOTPLUG_ON:
- handler->fn = (void *)&cpu_up;
- ret = schedule_work(&vcpu_hotplug_work);
- req->status = (u32) ret;
- break;
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
- struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
-
- handler->fn = NULL;
- ctrl_if_register_receiver(CMSG_VCPU_HOTPLUG,
- vcpu_hotplug_event_handler, 0);
-
- return 0;
-}
-
-__initcall(setup_vcpu_hotplug_event);
-
#else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(void)
{
@@ -1529,20 +1567,66 @@
extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-void __init smp_intr_init(void)
+void smp_intr_init(void)
{
int cpu = smp_processor_id();
per_cpu(resched_irq, cpu) =
- bind_ipi_on_cpu_to_irq(RESCHEDULE_VECTOR);
+ bind_ipi_to_irq(RESCHEDULE_VECTOR);
sprintf(resched_name[cpu], "resched%d", cpu);
BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
SA_INTERRUPT, resched_name[cpu], NULL));
per_cpu(callfunc_irq, cpu) =
- bind_ipi_on_cpu_to_irq(CALL_FUNCTION_VECTOR);
+ bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
sprintf(callfunc_name[cpu], "callfunc%d", cpu);
BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
smp_call_function_interrupt,
SA_INTERRUPT, callfunc_name[cpu], NULL));
}
+
+static void smp_intr_exit(void)
+{
+ int cpu = smp_processor_id();
+
+ free_irq(per_cpu(resched_irq, cpu), NULL);
+ unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+ free_irq(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
+void smp_suspend(void)
+{
+ /* XXX todo: take down time and ipi's on all cpus */
+ local_teardown_timer_irq();
+ smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+ /* XXX todo: restore time and ipi's on all cpus */
+ smp_intr_init();
+ local_setup_timer_irq();
+}
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+ int cpu = smp_processor_id();
+ extern atomic_t vcpus_rebooting;
+
+ /* We are the first thing the vcpu runs when it comes back,
+ and we are supposed to restore the IPIs and timer
+ interrupts etc. When we return, the vcpu's idle loop will
+ start up again. */
+ _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+ _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+ _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+ _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu)
);
+ atomic_dec(&vcpus_rebooting);
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Thu Aug 25 22:53:20 2005
@@ -70,6 +70,8 @@
#include "io_ports.h"
+#include <asm-xen/evtchn.h>
+
extern spinlock_t i8259A_lock;
int pit_latch_buggy; /* extern */
@@ -113,26 +115,15 @@
u32 version;
};
static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-static struct timeval shadow_tv;
+static struct timespec shadow_tv;
+static u32 shadow_tv_version;
/* Keep track of last time we did processing/updating of jiffies and xtime. */
static u64 processed_system_time; /* System time (ns) at last processing. */
static DEFINE_PER_CPU(u64, processed_system_time);
-#define NS_PER_TICK (1000000000ULL/HZ)
-
-#define HANDLE_USEC_UNDERFLOW(_tv) do { \
- while ((_tv).tv_usec < 0) { \
- (_tv).tv_usec += USEC_PER_SEC; \
- (_tv).tv_sec--; \
- } \
-} while (0)
-#define HANDLE_USEC_OVERFLOW(_tv) do { \
- while ((_tv).tv_usec >= USEC_PER_SEC) { \
- (_tv).tv_usec -= USEC_PER_SEC; \
- (_tv).tv_sec++; \
- } \
-} while (0)
+#define NS_PER_TICK (1000000000L/HZ)
+
static inline void __normalize_time(time_t *sec, s64 *nsec)
{
while (*nsec >= NSEC_PER_SEC) {
@@ -153,8 +144,6 @@
return 1;
}
__setup("independent_wallclock", __independent_wallclock);
-#define INDEPENDENT_WALLCLOCK() \
- (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
int tsc_disable __initdata = 0;
@@ -175,25 +164,40 @@
.delay = delay_tsc,
};
-static inline u32 down_shift(u64 time, int shift)
-{
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
if ( shift < 0 )
- return (u32)(time >> -shift);
- return (u32)((u32)time << shift);
-}
-
-/*
- * 32-bit multiplication of integer multiplicand and fractional multiplier
- * yielding 32-bit integer product.
- */
-static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
-{
- u32 product_int, product_frac;
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
__asm__ (
- "mul %3"
- : "=a" (product_frac), "=d" (product_int)
- : "0" (multiplicand), "r" (multiplier) );
- return product_int;
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "add %4,%%eax ; "
+ "xor %5,%5 ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#else
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#endif
+
+ return product;
}
void init_cpu_khz(void)
@@ -201,55 +205,43 @@
u64 __cpu_khz = 1000000ULL << 32;
struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
do_div(__cpu_khz, info->tsc_to_system_mul);
- cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
- printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
+ if ( info->tsc_shift < 0 )
+ cpu_khz = __cpu_khz << -info->tsc_shift;
+ else
+ cpu_khz = __cpu_khz >> info->tsc_shift;
}
static u64 get_nsec_offset(struct shadow_time_info *shadow)
{
- u64 now;
- u32 delta;
+ u64 now, delta;
rdtscll(now);
- delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
- return mul_frac(delta, shadow->tsc_to_nsec_mul);
+ delta = now - shadow->tsc_timestamp;
+ return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
}
static unsigned long get_usec_offset(struct shadow_time_info *shadow)
{
- u64 now;
- u32 delta;
+ u64 now, delta;
rdtscll(now);
- delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
- return mul_frac(delta, shadow->tsc_to_usec_mul);
-}
-
-static void update_wallclock(void)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
+ delta = now - shadow->tsc_timestamp;
+ return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
+}
+
+static void __update_wallclock(time_t sec, long nsec)
+{
long wtm_nsec, xtime_nsec;
time_t wtm_sec, xtime_sec;
- u64 tmp, usec;
-
- shadow_tv.tv_sec = s->wc_sec;
- shadow_tv.tv_usec = s->wc_usec;
-
- if (INDEPENDENT_WALLCLOCK())
- return;
-
- if ((time_status & STA_UNSYNC) != 0)
- return;
+ u64 tmp, wc_nsec;
/* Adjust wall-clock time base based on wall_jiffies ticks. */
- usec = processed_system_time;
- do_div(usec, 1000);
- usec += (u64)shadow_tv.tv_sec * 1000000ULL;
- usec += (u64)shadow_tv.tv_usec;
- usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
+ wc_nsec = processed_system_time;
+ wc_nsec += (u64)sec * 1000000000ULL;
+ wc_nsec += (u64)nsec;
+ wc_nsec -= (jiffies - wall_jiffies) * (u64)(NSEC_PER_SEC / HZ);
/* Split wallclock base into seconds and nanoseconds. */
- tmp = usec;
- xtime_nsec = do_div(tmp, 1000000) * 1000ULL;
+ tmp = wc_nsec;
+ xtime_nsec = do_div(tmp, 1000000000);
xtime_sec = (time_t)tmp;
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
@@ -257,13 +249,35 @@
set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+}
+
+static void update_wallclock(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ do {
+ shadow_tv_version = s->wc_version;
+ rmb();
+ shadow_tv.tv_sec = s->wc_sec;
+ shadow_tv.tv_nsec = s->wc_nsec;
+ rmb();
+ }
+ while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
+
+ if (!independent_wallclock)
+ __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
}
/*
* Reads a consistent set of time-base values from Xen, into a shadow data
- * area. Must be called with the xtime_lock held for writing.
+ * area.
*/
-static void __get_time_values_from_xen(void)
+static void get_time_values_from_xen(void)
{
shared_info_t *s = HYPERVISOR_shared_info;
struct vcpu_time_info *src;
@@ -273,7 +287,7 @@
dst = &per_cpu(shadow_time, smp_processor_id());
do {
- dst->version = src->time_version2;
+ dst->version = src->version;
rmb();
dst->tsc_timestamp = src->tsc_timestamp;
dst->system_timestamp = src->system_time;
@@ -281,13 +295,9 @@
dst->tsc_shift = src->tsc_shift;
rmb();
}
- while (dst->version != src->time_version1);
+ while ((src->version & 1) | (dst->version ^ src->version));
dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
-
- if ((shadow_tv.tv_sec != s->wc_sec) ||
- (shadow_tv.tv_usec != s->wc_usec))
- update_wallclock();
}
static inline int time_values_up_to_date(int cpu)
@@ -298,7 +308,7 @@
src = &HYPERVISOR_shared_info->vcpu_time[cpu];
dst = &per_cpu(shadow_time, cpu);
- return (dst->version == src->time_version2);
+ return (dst->version == src->version);
}
/*
@@ -339,10 +349,10 @@
unsigned long seq;
unsigned long usec, sec;
unsigned long max_ntp_tick;
- unsigned long flags;
s64 nsec;
unsigned int cpu;
struct shadow_time_info *shadow;
+ u32 local_time_version;
cpu = get_cpu();
shadow = &per_cpu(shadow_time, cpu);
@@ -350,6 +360,7 @@
do {
unsigned long lost;
+ local_time_version = shadow->version;
seq = read_seqbegin(&xtime_lock);
usec = get_usec_offset(shadow);
@@ -385,12 +396,11 @@
* overflowed). Detect that and recalculate
* with fresh values.
*/
- write_seqlock_irqsave(&xtime_lock, flags);
- __get_time_values_from_xen();
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ get_time_values_from_xen();
continue;
}
- } while (read_seqretry(&xtime_lock, seq));
+ } while (read_seqretry(&xtime_lock, seq) ||
+ (local_time_version != shadow->version));
put_cpu();
@@ -407,18 +417,14 @@
int do_settimeofday(struct timespec *tv)
{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec;
+ time_t sec;
s64 nsec;
- struct timespec xentime;
unsigned int cpu;
struct shadow_time_info *shadow;
+ dom0_op_t op;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
-
- if (!INDEPENDENT_WALLCLOCK())
- return 0; /* Silent failure? */
cpu = get_cpu();
shadow = &per_cpu(shadow_time, cpu);
@@ -430,50 +436,30 @@
* overflows. If that were to happen then our shadow time values would
* be stale, so we can retry with fresh ones.
*/
- again:
- nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
- if (unlikely(!time_values_up_to_date(cpu))) {
- __get_time_values_from_xen();
- goto again;
- }
-
+ for ( ; ; ) {
+ nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
+ if (time_values_up_to_date(cpu))
+ break;
+ get_time_values_from_xen();
+ }
+ sec = tv->tv_sec;
__normalize_time(&sec, &nsec);
- set_normalized_timespec(&xentime, sec, nsec);
-
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- nsec -= (shadow->system_timestamp - processed_system_time);
-
- __normalize_time(&sec, &nsec);
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (xen_start_info.flags & SIF_INITDOMAIN) {
- dom0_op_t op;
+
+ if ((xen_start_info.flags & SIF_INITDOMAIN) &&
+ !independent_wallclock) {
op.cmd = DOM0_SETTIME;
- op.u.settime.secs = xentime.tv_sec;
- op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC;
+ op.u.settime.secs = sec;
+ op.u.settime.nsecs = nsec;
op.u.settime.system_time = shadow->system_timestamp;
- write_sequnlock_irq(&xtime_lock);
HYPERVISOR_dom0_op(&op);
- } else
-#endif
- write_sequnlock_irq(&xtime_lock);
+ update_wallclock();
+ } else if (independent_wallclock) {
+ nsec -= shadow->system_timestamp;
+ __normalize_time(&sec, &nsec);
+ __update_wallclock(sec, nsec);
+ }
+
+ write_sequnlock_irq(&xtime_lock);
put_cpu();
@@ -489,6 +475,9 @@
int retval;
WARN_ON(irqs_disabled());
+
+ if (!(xen_start_info.flags & SIF_INITDOMAIN))
+ return 0;
/* gets recalled with irq locally disabled */
spin_lock_irq(&rtc_lock);
@@ -515,21 +504,21 @@
{
int cpu = get_cpu();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
- s64 off;
- unsigned long flags;
-
- for ( ; ; ) {
- off = get_nsec_offset(shadow);
- if (time_values_up_to_date(cpu))
- break;
- write_seqlock_irqsave(&xtime_lock, flags);
- __get_time_values_from_xen();
- write_sequnlock_irqrestore(&xtime_lock, flags);
- }
+ u64 time;
+ u32 local_time_version;
+
+ do {
+ local_time_version = shadow->version;
+ smp_rmb();
+ time = shadow->system_timestamp + get_nsec_offset(shadow);
+ if (!time_values_up_to_date(cpu))
+ get_time_values_from_xen();
+ smp_rmb();
+ } while (local_time_version != shadow->version);
put_cpu();
- return shadow->system_timestamp + off;
+ return time;
}
EXPORT_SYMBOL(monotonic_clock);
@@ -551,19 +540,16 @@
EXPORT_SYMBOL(profile_pc);
#endif
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
s64 delta, delta_cpu;
int cpu = smp_processor_id();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+ write_seqlock(&xtime_lock);
+
do {
- __get_time_values_from_xen();
+ get_time_values_from_xen();
delta = delta_cpu =
shadow->system_timestamp + get_nsec_offset(shadow);
@@ -572,7 +558,7 @@
}
while (!time_values_up_to_date(cpu));
- if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+ if (unlikely(delta < (s64)-1000000) || unlikely(delta_cpu < 0)) {
printk("Timer ISR/%d: Time went backwards: "
"delta=%lld cpu_delta=%lld shadow=%lld "
"off=%lld processed=%lld cpu_processed=%lld\n",
@@ -583,7 +569,6 @@
for (cpu = 0; cpu < num_online_cpus(); cpu++)
printk(" %d: %lld\n", cpu,
per_cpu(processed_system_time, cpu));
- return;
}
/* System-wide jiffy work. */
@@ -593,32 +578,25 @@
do_timer(regs);
}
- /* Local CPU jiffy work. */
+ if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+ update_wallclock();
+ clock_was_set();
+ }
+
+ write_sequnlock(&xtime_lock);
+
+ /*
+ * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
+ * if there is risk of deadlock if we do (since update_process_times
+ * may do scheduler rebalancing work and thus acquire runqueue locks).
+ */
while (delta_cpu >= NS_PER_TICK) {
delta_cpu -= NS_PER_TICK;
per_cpu(processed_system_time, cpu) += NS_PER_TICK;
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING, regs);
}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer_interrupt(irq, NULL, regs);
- write_sequnlock(&xtime_lock);
+
return IRQ_HANDLED;
}
@@ -767,7 +745,7 @@
#endif
/* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
static struct irqaction irq_timer = {
timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
@@ -786,15 +764,16 @@
return;
}
#endif
- __get_time_values_from_xen();
- xtime.tv_sec = shadow_tv.tv_sec;
- xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ get_time_values_from_xen();
+
processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
+ update_wallclock();
+
init_cpu_khz();
+ printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
#if defined(__x86_64__)
vxtime.mode = VXTIME_TSC;
@@ -860,6 +839,8 @@
void time_suspend(void)
{
/* nothing */
+ teardown_irq(per_cpu(timer_irq, 0), &irq_timer);
+ unbind_virq_from_irq(VIRQ_TIMER);
}
/* No locking required. We are only CPU running, and interrupts are off. */
@@ -867,17 +848,31 @@
{
init_cpu_khz();
- /* Get timebases for new environment. */
- __get_time_values_from_xen();
-
- /* Reset our own concept of passage of system time. */
- processed_system_time =
- per_cpu(shadow_time, smp_processor_id()).system_timestamp;
+ get_time_values_from_xen();
+
+ processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
+
+ update_wallclock();
+
+ per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
+ (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
}
#ifdef CONFIG_SMP
static char timer_name[NR_CPUS][15];
+void local_setup_timer_irq(void)
+{
+ int cpu = smp_processor_id();
+
+ if (cpu == 0)
+ return;
+ per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
+ sprintf(timer_name[cpu], "timer%d", cpu);
+ BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
+ SA_INTERRUPT, timer_name[cpu], NULL));
+}
+
void local_setup_timer(void)
{
int seq, cpu = smp_processor_id();
@@ -888,10 +883,17 @@
per_cpu(shadow_time, cpu).system_timestamp;
} while (read_seqretry(&xtime_lock, seq));
- per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
- sprintf(timer_name[cpu], "timer%d", cpu);
- BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
- SA_INTERRUPT, timer_name[cpu], NULL));
+ local_setup_timer_irq();
+}
+
+void local_teardown_timer_irq(void)
+{
+ int cpu = smp_processor_id();
+
+ if (cpu == 0)
+ return;
+ free_irq(per_cpu(timer_irq, cpu), NULL);
+ unbind_virq_from_irq(VIRQ_TIMER);
}
#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Aug 25 22:53:20 2005
@@ -871,6 +871,7 @@
}
}
+#ifndef CONFIG_XEN
fastcall void setup_x86_bogus_stack(unsigned char * stk)
{
unsigned long *switch16_ptr, *switch32_ptr;
@@ -915,6 +916,7 @@
memcpy(stack32, stack16, len);
return stack32;
}
+#endif
/*
* 'math_state_restore()' saves the current math information in the
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Thu Aug 25 22:53:20 2005
@@ -281,7 +281,7 @@
siginfo_t info;
/* Set the "privileged fault" bit to something sane. */
- error_code &= 3;
+ error_code &= ~4;
error_code |= (regs->xcs & 2) << 1;
if (regs->eflags & X86_EFLAGS_VM)
error_code |= 4;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c Thu Aug 25 22:53:20 2005
@@ -41,8 +41,7 @@
if (!pte_none(*(kmap_pte-idx)))
BUG();
#endif
- set_pte(kmap_pte-idx, mk_pte(page, prot));
- __flush_tlb_one(vaddr);
+ set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
return (void*) vaddr;
}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Thu Aug 25
22:53:20 2005
@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/balloon.h>
+#include <linux/module.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#include <linux/percpu.h>
#include <asm/tlbflush.h>
@@ -58,124 +59,124 @@
#ifndef CONFIG_XEN_SHADOW_MODE
void xen_l1_entry_update(pte_t *ptr, pte_t val)
{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = pte_val_ma(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pte_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = pmd_val_ma(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pmd_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
#ifdef CONFIG_X86_PAE
void xen_l3_entry_update(pud_t *ptr, pud_t val)
{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = pud_val_ma(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pud_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
#endif
#ifdef CONFIG_X86_64
void xen_l3_entry_update(pud_t *ptr, pud_t val)
{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = val.pud;
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = val.pud;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = val.pgd;
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = val.pgd;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
#endif /* CONFIG_X86_64 */
#endif /* CONFIG_XEN_SHADOW_MODE */
void xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
- mmu_update_t u;
- u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- u.val = pfn;
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+ mmu_update_t u;
+ u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ u.val = pfn;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
void xen_pt_switch(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_BASEPTR;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_new_user_pt(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_NEW_USER_BASEPTR;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_USER_BASEPTR;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_tlb_flush(void)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_invlpg(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_LOCAL;
- op.linear_addr = ptr & PAGE_MASK;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_INVLPG_LOCAL;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
#ifdef CONFIG_SMP
void xen_tlb_flush_all(void)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_ALL;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_TLB_FLUSH_ALL;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_tlb_flush_mask(cpumask_t *mask)
{
- struct mmuext_op op;
- if ( cpus_empty(*mask) )
- return;
- op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- op.vcpumask = mask->bits;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ if ( cpus_empty(*mask) )
+ return;
+ op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+ op.vcpumask = mask->bits;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_invlpg_all(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_ALL;
- op.linear_addr = ptr & PAGE_MASK;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_INVLPG_ALL;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
{
- struct mmuext_op op;
- if ( cpus_empty(*mask) )
- return;
- op.cmd = MMUEXT_INVLPG_MULTI;
- op.vcpumask = mask->bits;
- op.linear_addr = ptr & PAGE_MASK;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ if ( cpus_empty(*mask) )
+ return;
+ op.cmd = MMUEXT_INVLPG_MULTI;
+ op.vcpumask = mask->bits;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
#endif /* CONFIG_SMP */
@@ -183,181 +184,233 @@
#ifndef CONFIG_XEN_SHADOW_MODE
void xen_pgd_pin(unsigned long ptr)
{
- struct mmuext_op op;
+ struct mmuext_op op;
#ifdef CONFIG_X86_64
- op.cmd = MMUEXT_PIN_L4_TABLE;
+ op.cmd = MMUEXT_PIN_L4_TABLE;
#elif defined(CONFIG_X86_PAE)
- op.cmd = MMUEXT_PIN_L3_TABLE;
+ op.cmd = MMUEXT_PIN_L3_TABLE;
#else
- op.cmd = MMUEXT_PIN_L2_TABLE;
+ op.cmd = MMUEXT_PIN_L2_TABLE;
#endif
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pgd_unpin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pte_pin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L1_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L1_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pte_unpin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
#ifdef CONFIG_X86_64
void xen_pud_pin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L3_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L3_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pud_unpin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pmd_pin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L2_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L2_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void xen_pmd_unpin(unsigned long ptr)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
#endif /* CONFIG_X86_64 */
#endif /* CONFIG_XEN_SHADOW_MODE */
void xen_set_ldt(unsigned long ptr, unsigned long len)
{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.linear_addr = ptr;
- op.nr_ents = len;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_contig_memory(unsigned long vstart, unsigned int order)
-{
- /*
- * Ensure multi-page extents are contiguous in machine memory. This code
- * could be cleaned up some, and the number of hypercalls reduced.
- */
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long mfn, i, flags;
-
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
-
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1<<order); i++) {
- pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
- pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
- pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
- pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- mfn = pte_mfn(*pte);
- HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
- phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
- INVALID_P2M_ENTRY;
- BUG_ON(HYPERVISOR_dom_mem_op(
- MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
- }
-
- /* 2. Get a new contiguous memory extent. */
- BUG_ON(HYPERVISOR_dom_mem_op(
- MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
-
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1<<order); i++) {
- HYPERVISOR_update_va_mapping(
- vstart + (i*PAGE_SIZE),
- __pte_ma(((mfn+i)<<PAGE_SHIFT)|__PAGE_KERNEL), 0);
- xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
- phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
- }
-
- flush_tlb_all();
-
- balloon_unlock(flags);
-}
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-
-unsigned long allocate_empty_lowmem_region(unsigned long pages)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long *pfn_array;
- unsigned long vstart;
- unsigned long i;
- unsigned int order = get_order(pages*PAGE_SIZE);
-
- vstart = __get_free_pages(GFP_KERNEL, order);
- if ( vstart == 0 )
- return 0UL;
-
- scrub_pages(vstart, 1 << order);
-
- pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
- if ( pfn_array == NULL )
- BUG();
-
- for ( i = 0; i < (1<<order); i++ )
- {
- pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE)));
- pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
- pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
- pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- pfn_array[i] = pte_mfn(*pte);
-#ifdef CONFIG_X86_64
- xen_l1_entry_update(pte, __pte(0));
-#else
- HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
-#endif
- phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
- INVALID_P2M_ENTRY;
- }
-
- flush_tlb_all();
-
- balloon_put_pages(pfn_array, 1 << order);
-
- vfree(pfn_array);
-
- return vstart;
-}
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+ struct mmuext_op op;
+ op.cmd = MMUEXT_SET_LDT;
+ op.linear_addr = ptr;
+ op.nr_ents = len;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+/*
+ * Bitmap is indexed by page number. If bit is set, the page is part of a
+ * xen_create_contiguous_region() area of memory.
+ */
+unsigned long *contiguous_bitmap;
+
+static void contiguous_bitmap_set(
+ unsigned long first_page, unsigned long nr_pages)
+{
+ unsigned long start_off, end_off, curr_idx, end_idx;
+
+ curr_idx = first_page / BITS_PER_LONG;
+ start_off = first_page & (BITS_PER_LONG-1);
+ end_idx = (first_page + nr_pages) / BITS_PER_LONG;
+ end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+ if (curr_idx == end_idx) {
+ contiguous_bitmap[curr_idx] |=
+ ((1UL<<end_off)-1) & -(1UL<<start_off);
+ } else {
+ contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
+ while ( ++curr_idx < end_idx )
+ contiguous_bitmap[curr_idx] = ~0UL;
+ contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
+ }
+}
+
+static void contiguous_bitmap_clear(
+ unsigned long first_page, unsigned long nr_pages)
+{
+ unsigned long start_off, end_off, curr_idx, end_idx;
+
+ curr_idx = first_page / BITS_PER_LONG;
+ start_off = first_page & (BITS_PER_LONG-1);
+ end_idx = (first_page + nr_pages) / BITS_PER_LONG;
+ end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+ if (curr_idx == end_idx) {
+ contiguous_bitmap[curr_idx] &=
+ -(1UL<<end_off) | ((1UL<<start_off)-1);
+ } else {
+ contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
+ while ( ++curr_idx != end_idx )
+ contiguous_bitmap[curr_idx] = 0;
+ contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
+ }
+}
+
+/* Ensure multi-page extents are contiguous in machine memory. */
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long mfn, i, flags;
+
+ scrub_pages(vstart, 1 << order);
+
+ balloon_lock(flags);
+
+ /* 1. Zap current PTEs, giving away the underlying pages. */
+ for (i = 0; i < (1<<order); i++) {
+ pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+ pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ mfn = pte_mfn(*pte);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+ }
+
+ /* 2. Get a new contiguous memory extent. */
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
+
+ /* 3. Map the new extent in place of old pages. */
+ for (i = 0; i < (1<<order); i++) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE),
+ pfn_pte_ma(mfn+i, PAGE_KERNEL), 0));
+ xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
+ }
+
+ flush_tlb_all();
+
+ contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+
+ balloon_unlock(flags);
+}
+
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long mfn, i, flags;
+
+ scrub_pages(vstart, 1 << order);
+
+ balloon_lock(flags);
+
+ contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+
+ /* 1. Zap current PTEs, giving away the underlying pages. */
+ for (i = 0; i < (1<<order); i++) {
+ pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+ pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ mfn = pte_mfn(*pte);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+ }
+
+ /* 2. Map new pages in place of old pages. */
+ for (i = 0; i < (1<<order); i++) {
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_increase_reservation, &mfn, 1, 0) != 1);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE),
+ pfn_pte_ma(mfn, PAGE_KERNEL), 0));
+ xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i);
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn;
+ }
+
+ flush_tlb_all();
+
+ balloon_unlock(flags);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Aug 25 22:53:20 2005
@@ -41,6 +41,14 @@
#include <asm/sections.h>
#include <asm-xen/hypervisor.h>
+extern unsigned long *contiguous_bitmap;
+
+#if defined(CONFIG_SWIOTLB)
+extern void swiotlb_init(void);
+int swiotlb;
+EXPORT_SYMBOL(swiotlb);
+#endif
+
unsigned int __VMALLOC_RESERVE = 128 << 20;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -334,18 +342,18 @@
extern void __init remap_numa_kva(void);
#endif
+pgd_t *swapper_pg_dir;
+
static void __init pagetable_init (void)
{
unsigned long vaddr;
- pgd_t *pgd_base = swapper_pg_dir;
- pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
-
-#ifdef CONFIG_X86_PAE
+ pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
int i;
- /* Init entries of the first-level page table to the zero page */
- for (i = 0; i < PTRS_PER_PGD; i++)
- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) |
_PAGE_PRESENT));
-#endif
+
+ swapper_pg_dir = pgd_base;
+ init_mm.pgd = pgd_base;
+ for (i = 0; i < NR_CPUS; i++)
+ per_cpu(cur_pgd, i) = pgd_base;
/* Enable PSE if available */
if (cpu_has_pse) {
@@ -358,44 +366,6 @@
__PAGE_KERNEL |= _PAGE_GLOBAL;
__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
}
-
- /*
- * Switch to proper mm_init page directory. Initialise from the current
- * page directory, write-protect the new page directory, then switch to
- * it. We clean up by write-enabling and then freeing the old page dir.
- */
-#ifndef CONFIG_X86_PAE
- memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
- make_page_readonly(pgd_base);
- xen_pgd_pin(__pa(pgd_base));
- load_cr3(pgd_base);
- xen_pgd_unpin(__pa(old_pgd));
- make_page_writable(old_pgd);
- __flush_tlb_all();
- free_bootmem(__pa(old_pgd), PAGE_SIZE);
-#else
- {
- pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
- pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
- pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
-
- memcpy(new_pmd, old_pmd, PAGE_SIZE);
- memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
- set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
-
- make_page_readonly(new_pmd);
- make_page_readonly(pgd_base);
- xen_pgd_pin(__pa(pgd_base));
- load_cr3(pgd_base);
- xen_pgd_unpin(__pa(old_pgd));
- make_page_writable(old_pgd);
- make_page_writable(old_pmd);
- __flush_tlb_all();
-
- free_bootmem(__pa(old_pgd), PAGE_SIZE);
- free_bootmem(__pa(old_pmd), PAGE_SIZE);
- }
-#endif
init_mm.context.pinned = 1;
kernel_physical_mapping_init(pgd_base);
@@ -409,17 +379,6 @@
page_table_range_init(vaddr, 0, pgd_base);
permanent_kmaps_init(pgd_base);
-
-#if 0 /* def CONFIG_X86_PAE */
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
-#endif
}
#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
@@ -630,6 +589,15 @@
int tmp;
int bad_ppro;
unsigned long pfn;
+
+ contiguous_bitmap = alloc_bootmem_low_pages(
+ (max_low_pfn + 2*BITS_PER_LONG) >> 3);
+ BUG_ON(!contiguous_bitmap);
+ memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
+
+#if defined(CONFIG_SWIOTLB)
+ swiotlb_init();
+#endif
#ifndef CONFIG_DISCONTIGMEM
if (!mem_map)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Thu Aug 25 22:53:20 2005
@@ -36,6 +36,8 @@
{
}
+#ifdef __i386__
+
void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
{
return NULL;
@@ -44,6 +46,8 @@
void __init bt_iounmap(void *addr, unsigned long size)
{
}
+
+#endif /* __i386__ */
#else
@@ -58,7 +62,7 @@
extern unsigned long max_low_pfn;
unsigned long mfn = address >> PAGE_SHIFT;
unsigned long pfn = mfn_to_pfn(mfn);
- return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
+ return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
}
/*
@@ -126,10 +130,12 @@
return NULL;
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
+ flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+#ifdef __x86_64__
+ flags |= _PAGE_USER;
+#endif
if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
- size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_DIRTY | _PAGE_ACCESSED
- | flags), domid)) {
+ size, __pgprot(flags), domid)) {
vunmap((void __force *) addr);
return NULL;
}
@@ -218,6 +224,8 @@
kfree(p);
}
+#ifdef __i386__
+
void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
{
unsigned long offset, last_addr;
@@ -289,6 +297,8 @@
}
}
+#endif /* __i386__ */
+
#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
/* These hacky macros avoid phys->machine translations. */
@@ -298,90 +308,20 @@
#define direct_mk_pte_phys(physpage, pgprot) \
__direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-static inline void direct_remap_area_pte(pte_t *pte,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
-
- do {
- (*v)->ptr = virt_to_machine(pte);
- (*v)++;
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
- pmd_t *pmd,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- if (address >= end)
- BUG();
- do {
- pte_t *pte = (mm == &init_mm) ?
- pte_alloc_kernel(mm, pmd, address) :
- pte_alloc_map(mm, pmd, address);
- if (!pte)
- return -ENOMEM;
- direct_remap_area_pte(pte, address, end - address, v);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+
+static int direct_remap_area_pte_fn(pte_t *pte,
+ struct page *pte_page,
+ unsigned long address,
+ void *data)
+{
+ mmu_update_t **v = (mmu_update_t **)data;
+
+ (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+ PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
+ (*v)++;
+
return 0;
}
-
-int __direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long size,
- mmu_update_t *v)
-{
- pgd_t * dir;
- unsigned long end = address + size;
- int error;
-
- dir = pgd_offset(mm, address);
- if (address >= end)
- BUG();
- spin_lock(&mm->page_table_lock);
- do {
- pud_t *pud;
- pmd_t *pmd;
-
- error = -ENOMEM;
- pud = pud_alloc(mm, dir, address);
- if (!pud)
- break;
- pmd = pmd_alloc(mm, pud, address);
- if (!pmd)
- break;
- error = 0;
- direct_remap_area_pmd(mm, pmd, address, end - address, &v);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
-
- } while (address && (address < end));
- spin_unlock(&mm->page_table_lock);
- return error;
-}
-
int direct_remap_area_pages(struct mm_struct *mm,
unsigned long address,
@@ -393,7 +333,7 @@
int i;
unsigned long start_address;
#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u, *w = u;
start_address = address;
@@ -402,11 +342,10 @@
for (i = 0; i < size; i += PAGE_SIZE) {
if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
/* Fill in the PTE pointers. */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
-
+ generic_page_range(mm, start_address,
+ address - start_address,
+ direct_remap_area_pte_fn, &w);
+ w = u;
if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
return -EFAULT;
v = u;
@@ -417,7 +356,7 @@
* Fill in the machine address: PTE ptr is done later by
* __direct_remap_area_pages().
*/
- v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+ v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT,
prot));
machine_addr += PAGE_SIZE;
address += PAGE_SIZE;
@@ -426,10 +365,8 @@
if (v != u) {
/* get the ptep's filled in */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
+ generic_page_range(mm, start_address, address - start_address,
+ direct_remap_area_pte_fn, &w);
if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
return -EFAULT;
}
@@ -440,3 +377,48 @@
}
EXPORT_SYMBOL(direct_remap_area_pages);
+
+static int lookup_pte_fn(
+ pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+ unsigned long *ptep = (unsigned long *)data;
+ if (ptep)
+ *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
+ PAGE_SHIFT) |
+ ((unsigned long)pte & ~PAGE_MASK);
+ return 0;
+}
+
+int create_lookup_pte_addr(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long *ptep)
+{
+ return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
+}
+
+EXPORT_SYMBOL(create_lookup_pte_addr);
+
+static int noop_fn(
+ pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+ return 0;
+}
+
+int touch_pte_range(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size)
+{
+ return generic_page_range(mm, address, size, noop_fn, NULL);
+}
+
+EXPORT_SYMBOL(touch_pte_range);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Aug 25 22:53:20 2005
@@ -25,6 +25,7 @@
#include <asm/mmu_context.h>
#include <asm-xen/foreign_page.h>
+#include <asm-xen/hypervisor.h>
void show_mem(void)
{
@@ -169,7 +170,7 @@
__flush_tlb_one(vaddr);
}
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t
flags)
+void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -221,8 +222,8 @@
unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
if (!pte_write(*virt_to_ptep(va)))
- HYPERVISOR_update_va_mapping(
- va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
ClearPageForeign(pte);
set_page_count(pte, 1);
@@ -274,6 +275,11 @@
{
unsigned long flags;
+#ifdef CONFIG_X86_PAE
+ /* this gives us a page below 4GB */
+ xen_create_contiguous_region((unsigned long)pgd, 0);
+#endif
+
if (!HAVE_SHARED_KERNEL_PMD)
spin_lock_irqsave(&pgd_lock, flags);
@@ -349,16 +355,17 @@
if (!pte_write(*ptep)) {
xen_pgd_unpin(__pa(pgd));
- HYPERVISOR_update_va_mapping(
+ BUG_ON(HYPERVISOR_update_va_mapping(
(unsigned long)pgd,
pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
- 0);
+ 0));
}
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1) {
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
+ make_page_writable(pmd);
kmem_cache_free(pmd_cache, pmd);
}
if (!HAVE_SHARED_KERNEL_PMD) {
@@ -444,9 +451,9 @@
if (PageHighMem(page))
return;
- HYPERVISOR_update_va_mapping(
+ BUG_ON(HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte(pfn, flags), 0);
+ pfn_pte(pfn, flags), 0));
}
static void mm_walk(struct mm_struct *mm, pgprot_t flags)
@@ -485,10 +492,10 @@
spin_lock(&mm->page_table_lock);
mm_walk(mm, PAGE_KERNEL_RO);
- HYPERVISOR_update_va_mapping(
+ BUG_ON(HYPERVISOR_update_va_mapping(
(unsigned long)mm->pgd,
pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
- UVMF_TLB_FLUSH);
+ UVMF_TLB_FLUSH));
xen_pgd_pin(__pa(mm->pgd));
mm->context.pinned = 1;
spin_lock(&mm_unpinned_lock);
@@ -503,9 +510,9 @@
spin_lock(&mm->page_table_lock);
xen_pgd_unpin(__pa(mm->pgd));
- HYPERVISOR_update_va_mapping(
+ BUG_ON(HYPERVISOR_update_va_mapping(
(unsigned long)mm->pgd,
- pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0));
mm_walk(mm, PAGE_KERNEL);
xen_tlb_flush();
mm->context.pinned = 0;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
c-pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
pci-$(CONFIG_X86_VISWS) :=
c-pci-$(CONFIG_X86_NUMAQ) := numa.o
-pci-$(CONFIG_X86_NUMAQ) := irq.o
+l-pci-$(CONFIG_X86_NUMAQ) := irq.o
obj-y += $(pci-y)
c-obj-y += $(c-pci-y) common.o
@@ -27,6 +27,7 @@
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
@ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
-obj-y += $(c-obj-y) $(l-pci-y)
+# Make sure irq.o gets linked in before common.o
+obj-y += $(patsubst common.o,$(l-pci-y) common.o,$(c-obj-y))
clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Thu Aug 25 22:53:20 2005
@@ -116,9 +116,9 @@
#elif defined (__x86_64__)
#define IRQ_REG orig_rax
#endif
-#define do_IRQ(irq, regs) do { \
- (regs)->IRQ_REG = (irq); \
- do_IRQ((regs)); \
+#define do_IRQ(irq, regs) do { \
+ (regs)->IRQ_REG = (irq); \
+ do_IRQ((regs)); \
} while (0)
#endif
@@ -137,14 +137,14 @@
/* NB. Interrupts are disabled on entry. */
asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
{
- u32 l1, l2;
+ u32 l1, l2;
unsigned int l1i, l2i, port;
int irq, cpu = smp_processor_id();
shared_info_t *s = HYPERVISOR_shared_info;
vcpu_info_t *vcpu_info = &s->vcpu_data[cpu];
vcpu_info->evtchn_upcall_pending = 0;
-
+
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( l1 != 0 )
@@ -158,9 +158,9 @@
l2 &= ~(1 << l2i);
port = (l1i << 5) + l2i;
- if ( (irq = evtchn_to_irq[port]) != -1 )
+ if ( (irq = evtchn_to_irq[port]) != -1 ) {
do_IRQ(irq, regs);
- else
+ } else
evtchn_device_upcall(port);
}
}
@@ -229,13 +229,14 @@
if ( HYPERVISOR_event_channel_op(&op) != 0 )
panic("Failed to unbind virtual IRQ %d\n", virq);
- /* This is a slight hack. Interdomain ports can be allocated
- directly by userspace, and at that point they get bound by
- Xen to vcpu 0. We therefore need to make sure that if we
- get an event on an event channel we don't know about vcpu 0
- handles it. Binding channels to vcpu 0 when closing them
- achieves this. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /*
+ * This is a slight hack. Interdomain ports can be allocated directly
+ * by userspace, and at that point they get bound by Xen to vcpu 0. We
+ * therefore need to make sure that if we get an event on an event
+ * channel we don't know about vcpu 0 handles it. Binding channels to
+ * vcpu 0 when closing them achieves this.
+ */
+ bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
irq_to_evtchn[irq] = -1;
per_cpu(virq_to_irq, cpu)[virq] = -1;
@@ -244,7 +245,75 @@
spin_unlock(&irq_mapping_update_lock);
}
-int bind_ipi_on_cpu_to_irq(int ipi)
+/* This is only used when a vcpu from an xm save. The ipi is expected
+ to have been bound before we suspended, and so all of the xenolinux
+ state is set up; we only need to restore the Xen side of things.
+ The irq number has to be the same, but the evtchn number can
+ change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+ evtchn_op_t op;
+ int evtchn;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ op.cmd = EVTCHNOP_bind_ipi;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+ evtchn = op.u.bind_ipi.port;
+
+ printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+ ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+ evtchn);
+
+ evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+ irq_to_evtchn[irq] = -1;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
+ evtchn_to_irq[evtchn]);
+ per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+ bind_evtchn_to_cpu(evtchn, vcpu);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+ evtchn_op_t op;
+ int evtchn;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ op.cmd = EVTCHNOP_bind_virq;
+ op.u.bind_virq.virq = virq;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IRQ %d\n", virq);
+ evtchn = op.u.bind_virq.port;
+
+ evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+ irq_to_evtchn[irq] = -1;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+int bind_ipi_to_irq(int ipi)
{
evtchn_op_t op;
int evtchn, irq;
@@ -269,7 +338,7 @@
}
else
{
- irq = evtchn_to_irq[evtchn];
+ irq = evtchn_to_irq[evtchn];
}
irq_bindcount[irq]++;
@@ -284,29 +353,29 @@
evtchn_op_t op;
int cpu = smp_processor_id();
int evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi];
- int irq = irq_to_evtchn[evtchn];
+ int irq = evtchn_to_irq[evtchn];
spin_lock(&irq_mapping_update_lock);
if ( --irq_bindcount[irq] == 0 )
{
- op.cmd = EVTCHNOP_close;
- op.u.close.dom = DOMID_SELF;
- op.u.close.port = evtchn;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
-
- /* See comments in unbind_virq_from_irq */
- bind_evtchn_to_cpu(evtchn, 0);
+ op.cmd = EVTCHNOP_close;
+ op.u.close.dom = DOMID_SELF;
+ op.u.close.port = evtchn;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
+
+ /* See comments in unbind_virq_from_irq */
+ bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
irq_to_evtchn[irq] = -1;
- per_cpu(ipi_to_evtchn, cpu)[ipi] = 0;
+ per_cpu(ipi_to_evtchn, cpu)[ipi] = 0;
}
spin_unlock(&irq_mapping_update_lock);
}
-int bind_evtchn_to_irq(int evtchn)
+int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
@@ -326,7 +395,7 @@
return irq;
}
-void unbind_evtchn_from_irq(int evtchn)
+void unbind_evtchn_from_irq(unsigned int evtchn)
{
int irq = evtchn_to_irq[evtchn];
@@ -341,9 +410,36 @@
spin_unlock(&irq_mapping_update_lock);
}
+int bind_evtchn_to_irqhandler(
+ unsigned int evtchn,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ unsigned int irq;
+ int retval;
+
+ irq = bind_evtchn_to_irq(evtchn);
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if ( retval != 0 )
+ unbind_evtchn_from_irq(evtchn);
+
+ return retval;
+}
+
+void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id)
+{
+ unsigned int irq = evtchn_to_irq[evtchn];
+ free_irq(irq, dev_id);
+ unbind_evtchn_from_irq(evtchn);
+}
+
+#ifdef CONFIG_SMP
static void do_nothing_function(void *ign)
{
}
+#endif
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
@@ -354,38 +450,37 @@
spin_lock(&irq_mapping_update_lock);
evtchn = irq_to_evtchn[irq];
if (!VALID_EVTCHN(evtchn)) {
- spin_unlock(&irq_mapping_update_lock);
- return;
- }
-
- /* Tell Xen to send future instances of this interrupt to the
- other vcpu */
+ spin_unlock(&irq_mapping_update_lock);
+ return;
+ }
+
+ /* Tell Xen to send future instances of this interrupt to other vcpu. */
op.cmd = EVTCHNOP_bind_vcpu;
op.u.bind_vcpu.port = evtchn;
op.u.bind_vcpu.vcpu = tcpu;
- /* If this fails, it usually just indicates that we're dealing
- with a virq or IPI channel, which don't actually need to be
- rebound. Ignore it, but don't do the xenlinux-level rebind
- in that case. */
+ /*
+ * If this fails, it usually just indicates that we're dealing with a virq
+ * or IPI channel, which don't actually need to be rebound. Ignore it,
+ * but don't do the xenlinux-level rebind in that case.
+ */
if (HYPERVISOR_event_channel_op(&op) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu);
spin_unlock(&irq_mapping_update_lock);
- /* Now send the new target processor a NOP IPI. When this
- returns, it will check for any pending interrupts, and so
- service any that got delivered to the wrong processor by
- mistake. */
- /* XXX: The only time this is called with interrupts disabled is
- from the hotplug/hotunplug path. In that case, all cpus are
- stopped with interrupts disabled, and the missed interrupts
- will be picked up when they start again. This is kind of a
- hack.
- */
- if (!irqs_disabled()) {
- smp_call_function(do_nothing_function, NULL, 0, 0);
- }
+ /*
+ * Now send the new target processor a NOP IPI. When this returns, it
+ * will check for any pending interrupts, and so service any that got
+ * delivered to the wrong processor by mistake.
+ *
+ * XXX: The only time this is called with interrupts disabled is from the
+ * hotplug/hotunplug path. In that case, all cpus are stopped with
+ * interrupts disabled, and the missed interrupts will be picked up when
+ * they start again. This is kind of a hack.
+ */
+ if (!irqs_disabled())
+ smp_call_function(do_nothing_function, NULL, 0, 0);
}
@@ -585,6 +680,16 @@
set_affinity_irq
};
+void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+{
+ int evtchn = irq_to_evtchn[i];
+ shared_info_t *s = HYPERVISOR_shared_info;
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ BUG_ON(!synch_test_bit(evtchn, &s->evtchn_mask[0]));
+ synch_set_bit(evtchn, &s->evtchn_pending[0]);
+}
+
void irq_suspend(void)
{
int pirq, virq, irq, evtchn;
@@ -631,7 +736,7 @@
evtchn = op.u.bind_virq.port;
/* Record the new mapping. */
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = irq;
irq_to_evtchn[irq] = evtchn;
@@ -655,9 +760,9 @@
#endif
for ( cpu = 0; cpu < NR_CPUS; cpu++ ) {
- /* No VIRQ -> IRQ mappings. */
- for ( i = 0; i < NR_VIRQS; i++ )
- per_cpu(virq_to_irq, cpu)[i] = -1;
+ /* No VIRQ -> IRQ mappings. */
+ for ( i = 0; i < NR_VIRQS; i++ )
+ per_cpu(virq_to_irq, cpu)[i] = -1;
}
/* No event-channel -> IRQ mappings. */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Thu Aug 25 22:53:20 2005
@@ -34,44 +34,90 @@
EXPORT_SYMBOL(gnttab_grant_foreign_access);
+EXPORT_SYMBOL(gnttab_end_foreign_access_ref);
EXPORT_SYMBOL(gnttab_end_foreign_access);
EXPORT_SYMBOL(gnttab_query_foreign_access);
EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
+EXPORT_SYMBOL(gnttab_end_foreign_transfer_ref);
EXPORT_SYMBOL(gnttab_end_foreign_transfer);
EXPORT_SYMBOL(gnttab_alloc_grant_references);
EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_reference);
EXPORT_SYMBOL(gnttab_claim_grant_reference);
EXPORT_SYMBOL(gnttab_release_grant_reference);
EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
-static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
+
+static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static int gnttab_free_count = NR_GRANT_ENTRIES;
static grant_ref_t gnttab_free_head;
+static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
static grant_entry_t *shared;
-/*
- * Lock-free grant-entry allocator
- */
-
-static inline int
-get_free_entry(
- void)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
- gnttab_free_list[fh])) != fh) );
- return fh;
+static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+
+static int
+get_free_entries(int count)
+{
+ unsigned long flags;
+ int ref;
+ grant_ref_t head;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (gnttab_free_count < count) {
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return -1;
+ }
+ ref = head = gnttab_free_head;
+ gnttab_free_count -= count;
+ while (count-- > 1)
+ head = gnttab_list[head];
+ gnttab_free_head = gnttab_list[head];
+ gnttab_list[head] = GNTTAB_LIST_END;
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return ref;
+}
+
+#define get_free_entry() get_free_entries(1)
+
+static void
+do_free_callbacks(void)
+{
+ struct gnttab_free_callback *callback = gnttab_free_callback_list, *next;
+ gnttab_free_callback_list = NULL;
+ while (callback) {
+ next = callback->next;
+ if (gnttab_free_count >= callback->count) {
+ callback->next = NULL;
+ callback->fn(callback->arg);
+ } else {
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ }
+ callback = next;
+ }
}
static inline void
-put_free_entry(
- grant_ref_t ref)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { gnttab_free_list[ref] = fh = nfh; wmb(); }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+check_free_callbacks(void)
+{
+ if (unlikely(gnttab_free_callback_list))
+ do_free_callbacks();
+}
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ gnttab_list[ref] = gnttab_free_head;
+ gnttab_free_head = ref;
+ gnttab_free_count++;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
/*
@@ -79,8 +125,7 @@
*/
int
-gnttab_grant_foreign_access(
- domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
{
int ref;
@@ -96,8 +141,8 @@
}
void
-gnttab_grant_foreign_access_ref(
- grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly)
{
shared[ref].frame = frame;
shared[ref].domid = domid;
@@ -107,7 +152,7 @@
int
-gnttab_query_foreign_access( grant_ref_t ref )
+gnttab_query_foreign_access(grant_ref_t ref)
{
u16 nflags;
@@ -117,7 +162,7 @@
}
void
-gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
@@ -127,13 +172,17 @@
printk(KERN_ALERT "WARNING: g.e. still in use!\n");
}
while ( (nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags );
-
+}
+
+void
+gnttab_end_foreign_access(grant_ref_t ref, int readonly)
+{
+ gnttab_end_foreign_access_ref(ref, readonly);
put_free_entry(ref);
}
int
-gnttab_grant_foreign_transfer(
- domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
{
int ref;
@@ -149,8 +198,8 @@
}
void
-gnttab_grant_foreign_transfer_ref(
- grant_ref_t ref, domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
{
shared[ref].frame = pfn;
shared[ref].domid = domid;
@@ -159,21 +208,13 @@
}
unsigned long
-gnttab_end_foreign_transfer(
- grant_ref_t ref)
+gnttab_end_foreign_transfer_ref(grant_ref_t ref)
{
unsigned long frame = 0;
u16 flags;
flags = shared[ref].flags;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- /*
- * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
- * if gnttab_donate executes without interruption???
- */
-#else
- ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-#endif
+
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
@@ -183,65 +224,91 @@
while ( unlikely((frame = shared[ref].frame) == 0) )
cpu_relax();
+ return frame;
+}
+
+unsigned long
+gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+ unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
put_free_entry(ref);
-
return frame;
}
void
-gnttab_free_grant_references( u16 count, grant_ref_t head )
-{
- /* TODO: O(N)...? */
- grant_ref_t to_die = 0, next = head;
- int i;
-
- for ( i = 0; i < count; i++ )
- {
- to_die = next;
- next = gnttab_free_list[next];
- put_free_entry( to_die );
+gnttab_free_grant_reference(grant_ref_t ref)
+{
+
+ put_free_entry(ref);
+}
+
+void
+gnttab_free_grant_references(grant_ref_t head)
+{
+ grant_ref_t ref;
+ unsigned long flags;
+ int count = 1;
+ if (head == GNTTAB_LIST_END)
+ return;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ ref = head;
+ while (gnttab_list[ref] != GNTTAB_LIST_END) {
+ ref = gnttab_list[ref];
+ count++;
}
-}
-
-int
-gnttab_alloc_grant_references( u16 count,
- grant_ref_t *head,
- grant_ref_t *terminal )
-{
- int i;
- grant_ref_t h = gnttab_free_head;
-
- for ( i = 0; i < count; i++ )
- if ( unlikely(get_free_entry() == -1) )
- goto not_enough_refs;
+ gnttab_list[ref] = gnttab_free_head;
+ gnttab_free_head = head;
+ gnttab_free_count += count;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+int
+gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+ int h = get_free_entries(count);
+
+ if (h == -1)
+ return -ENOSPC;
*head = h;
- *terminal = gnttab_free_head;
return 0;
-
-not_enough_refs:
- gnttab_free_head = h;
- return -ENOSPC;
-}
-
-int
-gnttab_claim_grant_reference( grant_ref_t *private_head,
- grant_ref_t terminal )
-{
- grant_ref_t g;
- if ( unlikely((g = *private_head) == terminal) )
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+ grant_ref_t g = *private_head;
+ if (unlikely(g == GNTTAB_LIST_END))
return -ENOSPC;
- *private_head = gnttab_free_list[g];
+ *private_head = gnttab_list[g];
return g;
}
void
-gnttab_release_grant_reference( grant_ref_t *private_head,
- grant_ref_t release )
-{
- gnttab_free_list[release] = *private_head;
+gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release)
+{
+ gnttab_list[release] = *private_head;
*private_head = release;
+}
+
+void
+gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (callback->next)
+ goto out;
+ callback->fn = fn;
+ callback->arg = arg;
+ callback->count = count;
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ check_free_callbacks();
+ out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
/*
@@ -252,8 +319,9 @@
static struct proc_dir_entry *grant_pde;
-static int grant_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long data)
+static int
+grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ unsigned long data)
{
int ret;
privcmd_hypercall_t hypercall;
@@ -291,8 +359,9 @@
ioctl: grant_ioctl,
};
-static int grant_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int
+grant_read(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
int len;
unsigned int i;
@@ -321,8 +390,9 @@
return len;
}
-static int grant_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static int
+grant_write(struct file *file, const char __user *buffer, unsigned long count,
+ void *data)
{
/* TODO: implement this */
return -ENOSYS;
@@ -330,7 +400,8 @@
#endif /* CONFIG_PROC_FS */
-int gnttab_resume(void)
+int
+gnttab_resume(void)
{
gnttab_setup_table_t setup;
unsigned long frames[NR_GRANT_FRAMES];
@@ -349,7 +420,8 @@
return 0;
}
-int gnttab_suspend(void)
+int
+gnttab_suspend(void)
{
int i;
@@ -359,7 +431,8 @@
return 0;
}
-static int __init gnttab_init(void)
+static int __init
+gnttab_init(void)
{
int i;
@@ -368,7 +441,7 @@
shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- gnttab_free_list[i] = i + 1;
+ gnttab_list[i] = i + 1;
#ifdef CONFIG_PROC_FS
/*
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Thu Aug 25 22:53:20 2005
@@ -1,7 +1,4 @@
-
#define __KERNEL_SYSCALLS__
-static int errno;
-#include <linux/errno.h>
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -9,13 +6,23 @@
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
+#include <linux/stringify.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/dom0_ops.h>
#include <asm-xen/linux-public/suspend.h>
#include <asm-xen/queues.h>
+#include <asm-xen/xenbus.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+
+#define SHUTDOWN_INVALID -1
+#define SHUTDOWN_POWEROFF 0
+#define SHUTDOWN_REBOOT 1
+#define SHUTDOWN_SUSPEND 2
void machine_restart(char * __unused)
{
@@ -51,30 +58,76 @@
*/
/* Ignore multiple shutdown requests. */
-static int shutting_down = -1;
-
-static void __do_suspend(void)
+static int shutting_down = SHUTDOWN_INVALID;
+
+#ifndef CONFIG_HOTPLUG_CPU
+#define cpu_down(x) (-EOPNOTSUPP)
+#define cpu_up(x) (-EOPNOTSUPP)
+#endif
+
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int r;
+ int gdt_pages;
+ r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+ if (r != 0)
+ panic("pickling vcpu %d -> %d!\n", vcpu, r);
+
+ /* Translate from machine to physical addresses where necessary,
+ so that they can be translated to our new machine address space
+ after resume. libxc is responsible for doing this to vcpu0,
+ but we do it to the others. */
+ gdt_pages = (ctxt->gdt_ents + 511) / 512;
+ ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+ for (r = 0; r < gdt_pages; r++)
+ ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+}
+
+void _restore_vcpu(int cpu);
+
+atomic_t vcpus_rebooting;
+
+static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int r;
+ int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+ /* This is kind of a hack, and implicitly relies on the fact that
+ the vcpu stops in a place where all of the call clobbered
+ registers are already dead. */
+ ctxt->user_regs.esp -= 4;
+ ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+ ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+ /* De-canonicalise. libxc handles this for vcpu 0, but we need
+ to do it for the other vcpus. */
+ ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+ for (r = 0; r < gdt_pages; r++)
+ ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+
+ atomic_set(&vcpus_rebooting, 1);
+ r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+ if (r != 0) {
+ printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+ return -1;
+ }
+
+ /* Make sure we wait for the new vcpu to come up before trying to do
+ anything with it or starting the next one. */
+ while (atomic_read(&vcpus_rebooting))
+ barrier();
+
+ return 0;
+}
+
+static int __do_suspend(void *ignore)
{
int i, j;
suspend_record_t *suspend_record;
+ static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
/* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
/* XXX SMH: yes it would :-( */
-#ifdef CONFIG_XEN_BLKDEV_FRONTEND
- extern void blkdev_suspend(void);
- extern void blkdev_resume(void);
-#else
-#define blkdev_suspend() do{}while(0)
-#define blkdev_resume() do{}while(0)
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_FRONTEND
- extern void netif_suspend(void);
- extern void netif_resume(void);
-#else
-#define netif_suspend() do{}while(0)
-#define netif_resume() do{}while(0)
-#endif
#ifdef CONFIG_XEN_USB_FRONTEND
extern void usbif_resume();
@@ -82,37 +135,88 @@
#define usbif_resume() do{}while(0)
#endif
-#ifdef CONFIG_XEN_BLKDEV_GRANT
extern int gnttab_suspend(void);
extern int gnttab_resume(void);
-#else
-#define gnttab_suspend() do{}while(0)
-#define gnttab_resume() do{}while(0)
-#endif
-
+
+#ifdef CONFIG_SMP
+ extern void smp_suspend(void);
+ extern void smp_resume(void);
+#endif
extern void time_suspend(void);
extern void time_resume(void);
extern unsigned long max_pfn;
extern unsigned int *pfn_to_mfn_frame_list;
+ cpumask_t prev_online_cpus, prev_present_cpus;
+ int err = 0;
+
+ BUG_ON(smp_processor_id() != 0);
+ BUG_ON(in_interrupt());
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
+ if (num_online_cpus() > 1) {
+ printk(KERN_WARNING "Can't suspend SMP guests without
CONFIG_HOTPLUG_CPU\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
if ( suspend_record == NULL )
goto out;
+ /* Take all of the other cpus offline. We need to be careful not
+ to get preempted between the final test for num_online_cpus()
+ == 1 and disabling interrupts, since otherwise userspace could
+ bring another cpu online, and then we'd be stuffed. At the
+ same time, cpu_down can reschedule, so we need to enable
+ preemption while doing that. This kind of sucks, but should be
+ correct. */
+ /* (We don't need to worry about other cpus bringing stuff up,
+ since by the time num_online_cpus() == 1, there aren't any
+ other cpus) */
+ cpus_clear(prev_online_cpus);
+ preempt_disable();
+ while (num_online_cpus() > 1) {
+ preempt_enable();
+ for_each_online_cpu(i) {
+ if (i == 0)
+ continue;
+ err = cpu_down(i);
+ if (err != 0) {
+ printk(KERN_CRIT "Failed to take all CPUs down: %d.\n", err);
+ goto out_reenable_cpus;
+ }
+ cpu_set(i, prev_online_cpus);
+ }
+ preempt_disable();
+ }
+
suspend_record->nr_pfns = max_pfn; /* final number of pfns */
__cli();
+
+ preempt_enable();
+
+ cpus_clear(prev_present_cpus);
+ for_each_present_cpu(i) {
+ if (i == 0)
+ continue;
+ save_vcpu_context(i, &suspended_cpu_records[i]);
+ cpu_set(i, prev_present_cpus);
+ }
#ifdef __i386__
mm_pin_all();
kmem_cache_shrink(pgd_cache);
#endif
- netif_suspend();
-
- blkdev_suspend();
-
time_suspend();
+
+#ifdef CONFIG_SMP
+ smp_suspend();
+#endif
+
+ xenbus_suspend();
ctrl_if_suspend();
@@ -126,9 +230,11 @@
memcpy(&suspend_record->resume_info, &xen_start_info,
sizeof(xen_start_info));
- HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
-
- shutting_down = -1;
+ /* We'll stop somewhere inside this hypercall. When it returns,
+ we'll start resuming after the restore. */
+ HYPERVISOR_suspend(virt_to_mfn(suspend_record));
+
+ shutting_down = SHUTDOWN_INVALID;
memcpy(&xen_start_info, &suspend_record->resume_info,
sizeof(xen_start_info));
@@ -142,10 +248,10 @@
for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
{
pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ virt_to_mfn(&phys_to_machine_mapping[i]);
}
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
- virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+ virt_to_mfn(pfn_to_mfn_frame_list);
gnttab_resume();
@@ -153,19 +259,36 @@
ctrl_if_resume();
+ xenbus_resume();
+
+#ifdef CONFIG_SMP
+ smp_resume();
+#endif
+
time_resume();
- blkdev_resume();
-
- netif_resume();
-
usbif_resume();
+ for_each_cpu_mask(i, prev_present_cpus) {
+ restore_vcpu_context(i, &suspended_cpu_records[i]);
+ }
+
__sti();
+
+ out_reenable_cpus:
+ for_each_cpu_mask(i, prev_online_cpus) {
+ j = cpu_up(i);
+ if (j != 0) {
+ printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
+ i, j);
+ err = j;
+ }
+ }
out:
if ( suspend_record != NULL )
free_page((unsigned long)suspend_record);
+ return err;
}
static int shutdown_process(void *__unused)
@@ -186,7 +309,7 @@
switch ( shutting_down )
{
- case CMSG_SHUTDOWN_POWEROFF:
+ case SHUTDOWN_POWEROFF:
if ( execve("/sbin/poweroff", poweroff_argv, envp) < 0 )
{
sys_reboot(LINUX_REBOOT_MAGIC1,
@@ -196,7 +319,7 @@
}
break;
- case CMSG_SHUTDOWN_REBOOT:
+ case SHUTDOWN_REBOOT:
if ( execve("/sbin/reboot", restart_argv, envp) < 0 )
{
sys_reboot(LINUX_REBOOT_MAGIC1,
@@ -207,16 +330,28 @@
break;
}
- shutting_down = -1; /* could try again */
+ shutting_down = SHUTDOWN_INVALID; /* could try again */
return 0;
}
+static struct task_struct *kthread_create_on_cpu(int (*f)(void *arg),
+ void *arg,
+ const char *name,
+ int cpu)
+{
+ struct task_struct *p;
+ p = kthread_create(f, arg, name);
+ kthread_bind(p, cpu);
+ wake_up_process(p);
+ return p;
+}
+
static void __shutdown_handler(void *unused)
{
int err;
- if ( shutting_down != CMSG_SHUTDOWN_SUSPEND )
+ if ( shutting_down != SHUTDOWN_SUSPEND )
{
err = kernel_thread(shutdown_process, NULL, CLONE_FS | CLONE_FILES);
if ( err < 0 )
@@ -224,46 +359,121 @@
}
else
{
- __do_suspend();
- }
-}
-
-static void shutdown_handler(ctrl_msg_t *msg, unsigned long id)
+ kthread_create_on_cpu(__do_suspend, NULL, "suspender", 0);
+ }
+}
+
+static void shutdown_handler(struct xenbus_watch *watch, const char *node)
{
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
- if ( msg->subtype == CMSG_SHUTDOWN_SYSRQ )
- {
- int sysrq = ((shutdown_sysrq_t *)&msg->msg[0])->key;
-
+ char *str;
+
+ str = (char *)xenbus_read("control", "shutdown", NULL);
+ /* Ignore read errors. */
+ if (IS_ERR(str))
+ return;
+ if (strlen(str) == 0) {
+ kfree(str);
+ return;
+ }
+
+ xenbus_write("control", "shutdown", "", O_CREAT);
+
+ if (strcmp(str, "poweroff") == 0)
+ shutting_down = SHUTDOWN_POWEROFF;
+ else if (strcmp(str, "reboot") == 0)
+ shutting_down = SHUTDOWN_REBOOT;
+ else if (strcmp(str, "suspend") == 0)
+ shutting_down = SHUTDOWN_SUSPEND;
+ else {
+ printk("Ignoring shutdown request: %s\n", str);
+ shutting_down = SHUTDOWN_INVALID;
+ }
+
+ kfree(str);
+
+ if (shutting_down != SHUTDOWN_INVALID)
+ schedule_work(&shutdown_work);
+}
+
#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handler(struct xenbus_watch *watch, const char *node)
+{
+ char sysrq_key = '\0';
+
+ if (!xenbus_scanf("control", "sysrq", "%c", &sysrq_key)) {
+ printk(KERN_ERR "Unable to read sysrq code in control/sysrq\n");
+ return;
+ }
+
+ xenbus_printf("control", "sysrq", "%c", '\0');
+
+ if (sysrq_key != '\0') {
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- handle_sysrq(sysrq, NULL, NULL);
+ handle_sysrq(sysrq_key, NULL, NULL);
#else
- handle_sysrq(sysrq, NULL, NULL, NULL);
-#endif
-#endif
- }
- else if ( (shutting_down == -1) &&
- ((msg->subtype == CMSG_SHUTDOWN_POWEROFF) ||
- (msg->subtype == CMSG_SHUTDOWN_REBOOT) ||
- (msg->subtype == CMSG_SHUTDOWN_SUSPEND)) )
- {
- shutting_down = msg->subtype;
- schedule_work(&shutdown_work);
- }
- else
- {
- printk("Ignore spurious shutdown request\n");
- }
-
- ctrl_if_send_response(msg);
+ handle_sysrq(sysrq_key, NULL, NULL, NULL);
+#endif
+ }
+}
+#endif
+
+static struct xenbus_watch shutdown_watch = {
+ .node = "control/shutdown",
+ .callback = shutdown_handler
+};
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static struct xenbus_watch sysrq_watch = {
+ .node ="control/sysrq",
+ .callback = sysrq_handler
+};
+#endif
+
+static struct notifier_block xenstore_notifier;
+
+/* Setup our watcher
+ NB: Assumes xenbus_lock is held!
+*/
+static int setup_shutdown_watcher(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ int err1 = 0;
+#ifdef CONFIG_MAGIC_SYSRQ
+ int err2 = 0;
+#endif
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+
+ err1 = register_xenbus_watch(&shutdown_watch);
+#ifdef CONFIG_MAGIC_SYSRQ
+ err2 = register_xenbus_watch(&sysrq_watch);
+#endif
+
+ if (err1) {
+ printk(KERN_ERR "Failed to set shutdown watcher\n");
+ }
+
+#ifdef CONFIG_MAGIC_SYSRQ
+ if (err2) {
+ printk(KERN_ERR "Failed to set sysrq watcher\n");
+ }
+#endif
+
+ return NOTIFY_DONE;
}
static int __init setup_shutdown_event(void)
{
- ctrl_if_register_receiver(CMSG_SHUTDOWN, shutdown_handler, 0);
+
+ xenstore_notifier.notifier_call = setup_shutdown_watcher;
+
+ register_xenstore_notifier(&xenstore_notifier);
+
return 0;
}
-__initcall(setup_shutdown_event);
+subsys_initcall(setup_shutdown_event);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Thu Aug 25 22:53:20 2005
@@ -5,8 +5,6 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
@@ -14,34 +12,86 @@
#include <linux/init.h>
#include <asm/io.h>
#include <asm/page.h>
-
-EXPORT_SYMBOL(__dev_alloc_skb);
+#include <asm-xen/hypervisor.h>
/* Referenced in netback.c. */
/*static*/ kmem_cache_t *skbuff_cachep;
-/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
-#define XEN_SKB_SIZE \
- ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
+#define MAX_SKBUFF_ORDER 2
+static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
{
- struct sk_buff *skb;
- skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
- if ( likely(skb != NULL) )
- skb_reserve(skb, 16);
- return skb;
+ struct sk_buff *skb;
+ int order;
+
+ length = SKB_DATA_ALIGN(length + 16);
+ order = get_order(length + sizeof(struct skb_shared_info));
+ if (order > MAX_SKBUFF_ORDER) {
+ printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
+ "Increase MAX_SKBUFF_ORDER.\n", order);
+ return NULL;
+ }
+
+ skb = alloc_skb_from_cache(
+ skbuff_order_cachep[order], length, gfp_mask);
+ if (skb != NULL)
+ skb_reserve(skb, 16);
+
+ return skb;
}
static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
{
- scrub_pages(buf, 1);
+ int order = 0;
+
+ while (skbuff_order_cachep[order] != cachep)
+ order++;
+
+ if (order != 0)
+ xen_create_contiguous_region((unsigned long)buf, order);
+
+ scrub_pages(buf, 1 << order);
+}
+
+static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
+{
+ int order = 0;
+
+ while (skbuff_order_cachep[order] != cachep)
+ order++;
+
+ if (order != 0)
+ xen_destroy_contiguous_region((unsigned long)buf, order);
}
static int __init skbuff_init(void)
{
- skbuff_cachep = kmem_cache_create(
- "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL);
- return 0;
+ static char name[MAX_SKBUFF_ORDER + 1][20];
+ unsigned long size;
+ int order;
+
+ for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
+ size = PAGE_SIZE << order;
+ sprintf(name[order], "xen-skb-%lu", size);
+ skbuff_order_cachep[order] = kmem_cache_create(
+ name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+ }
+
+ skbuff_cachep = skbuff_order_cachep[0];
+
+ return 0;
}
__initcall(skbuff_init);
+
+EXPORT_SYMBOL(__dev_alloc_skb);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Thu Aug 25 22:53:20 2005
@@ -125,6 +125,10 @@
config X86_IO_APIC
bool
default XEN_PRIVILEGED_GUEST
+
+config X86_XEN_GENAPIC
+ bool
+ default XEN_PRIVILEGED_GUEST || SMP
config X86_LOCAL_APIC
bool
@@ -325,12 +329,12 @@
# need this always enabled with GART_IOMMU for the VIA workaround
config SWIOTLB
bool
- depends on GART_IOMMU
+ depends on PCI
default y
config DUMMY_IOMMU
bool
- depends on !GART_IOMMU && !SWIOTLB
+ depends on !GART_IOMMU
default y
help
Don't use IOMMU code. This will cause problems when you have more
than 4GB
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Makefile Thu Aug 25 22:53:20 2005
@@ -79,14 +79,15 @@
CFLAGS += $(xenflags-y)
AFLAGS += $(xenflags-y)
-prepare: include/asm-$(XENARCH)/asm_offset.h
-CLEAN_FILES += include/asm-$(XENARCH)/asm_offset.h
+prepare: include/asm-$(XENARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offsets.h
arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
include/linux/version.h include/config/MARKER
+include/asm-$(XENARCH)/offset.h: arch/$(XENARCH)/kernel/asm-offsets.s
+ $(call filechk,gen-asm-offsets)
-include/asm-$(XENARCH)/asm_offset.h: arch/xen/x86_64/kernel/asm-offsets.s
- $(call filechk,gen-asm-offsets)
- ln -fsn asm_offset.h include/asm-$(XENARCH)/offset.h
-
+include/asm-$(XENARCH)/asm_offsets.h: include/asm-$(XENARCH)/offset.h
+ ln -fsn offset.h include/asm-$(XENARCH)/asm_offsets.h
+ ln -fsn offset.h include/asm-$(XENARCH)/asm_offset.h
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Thu Aug 25
22:53:20 2005
@@ -36,8 +36,8 @@
$(call if_changed,syscall)
AFLAGS_vsyscall-int80.o = -m32 -I$(obj)
-AFLAGS_vsyscall-sysenter.o = -m32
-AFLAGS_vsyscall-syscall.o = -m32
+AFLAGS_vsyscall-sysenter.o = -m32 -I$(obj)
+AFLAGS_vsyscall-syscall.o = -m32 -I$(obj)
CFLAGS_ia32_ioctl.o += -Ifs/
s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o
@@ -48,13 +48,11 @@
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
-$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
../../i386/kernel/vsyscall-note.S
-$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
-$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-int80.o $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-syscall.o: \
+ $(obj)/vsyscall-sigreturn.S $(obj)/../../i386/kernel/vsyscall-note.S
-../../i386/kernel/vsyscall-note.S:
- @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@)
$(srctree)/arch/xen/i386/kernel/$(notdir $@)
- make -C arch/xen/i386/kernel vsyscall-note.S
+$(obj)/../../i386/kernel/vsyscall-note.S:
+ @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
obj-y += $(c-obj-y) $(s-obj-y)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c Thu Aug 25
22:53:20 2005
@@ -128,8 +128,12 @@
#endif
return 0;
}
-
-__initcall(init_syscall32);
+
+/*
+ * This must be done early in case we have an initrd containing 32-bit
+ * binaries (e.g., hotplug). This could be pushed upstream to arch/x86_64.
+ */
+core_initcall(init_syscall32);
/* May not be __init: called during resume */
void syscall32_cpu_init(void)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Thu Aug 25
22:53:20 2005
@@ -25,30 +25,32 @@
c-obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
-#obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
-c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o
+obj-$(CONFIG_X86_XEN_GENAPIC) += genapic.o genapic_xen.o
+c-obj-$(CONFIG_X86_IO_APIC) += genapic_cluster.o genapic_flat.o
#obj-$(CONFIG_PM) += suspend.o
#obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
#obj-$(CONFIG_CPU_FREQ) += cpufreq/
#obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
#obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
-c-obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
-#obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
+i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o
+i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
c-obj-$(CONFIG_MODULES) += module.o
-#obj-y += topology.o
+obj-y += topology.o
c-obj-y += intel_cacheinfo.o
bootflag-y += ../../../i386/kernel/bootflag.o
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o
topology-y += ../../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o
+#swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o
intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c Thu Aug 25
22:53:20 2005
@@ -45,10 +45,11 @@
void smp_local_timer_interrupt(struct pt_regs *regs)
{
+
+ profile_tick(CPU_PROFILING, regs);
+#ifndef CONFIG_XEN
int cpu = smp_processor_id();
- profile_tick(CPU_PROFILING, regs);
-#if 0
if (--per_cpu(prof_counter, cpu) <= 0) {
/*
* The multiplier may have changed since the last time we got
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c Thu Aug 25
22:53:20 2005
@@ -46,7 +46,7 @@
e820.nr_map++;
}
-#if 0
+#ifndef CONFIG_XEN
extern char _end[];
/*
@@ -235,7 +235,6 @@
}
}
-
void __init e820_print_map(char *who)
{
int i;
@@ -516,9 +515,31 @@
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
+
+#else /* CONFIX_XEN */
+extern unsigned long xen_override_max_pfn;
+extern union xen_start_info_union xen_start_info_union;
+/*
+ * Guest physical starts from 0.
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+ unsigned long max_end_pfn = xen_start_info.nr_pages;
+
+ if ( xen_override_max_pfn < max_end_pfn)
+ xen_override_max_pfn = max_end_pfn;
+
+ return xen_override_max_pfn;
+}
+
+
+
+void __init e820_reserve_resources(void)
+{
+ return; /* Xen won't have reserved entries */
+}
+
#endif
-
-extern unsigned long xen_override_max_pfn;
void __init parse_memopt(char *p, char **from)
{
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c Thu Aug
25 22:53:20 2005
@@ -5,6 +5,8 @@
#include <linux/string.h>
#include <asm/io.h>
#include <asm/processor.h>
+
+#ifndef CONFIG_XEN
/* Simple VGA output */
@@ -59,7 +61,6 @@
.index = -1,
};
-#ifndef CONFIG_XEN
/* Serial functions loosely based on a similar package from Klaus P. Gerlicher
*/
static int early_serial_base = 0x3f8; /* ttyS0 */
@@ -148,7 +149,8 @@
outb((divisor >> 8) & 0xff, early_serial_base + DLH);
outb(c & ~DLAB, early_serial_base + LCR);
}
-#else
+
+#else /* CONFIG_XEN */
static void
early_serial_write(struct console *con, const char *s, unsigned count)
@@ -167,6 +169,13 @@
static __init void early_serial_init(char *s)
{
}
+
+/*
+ * No early VGA console on Xen, as we do not have convenient ISA-space
+ * mappings. Someone should fix this for domain 0. For now, use fake serial.
+ */
+#define early_vga_console early_serial_console
+
#endif
static struct console early_serial_console = {
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Thu Aug 25
22:53:20 2005
@@ -128,7 +128,6 @@
* #define VGCF_IN_SYSCALL (1<<8)
*/
.macro SWITCH_TO_USER flag
- movl $0,%gs:pda_kernel_mode # change to user mode
subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in
the stack
movq %rax,(%rsp)
movq %r11,1*8(%rsp)
@@ -139,7 +138,6 @@
.endm
.macro SWITCH_TO_KERNEL ssoff,adjust=0
- btsq $0,%gs:pda_kernel_mode
jc 1f
orb $1,\ssoff-\adjust+4(%rsp)
1:
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Thu Aug 25
22:53:20 2005
@@ -28,8 +28,6 @@
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-/* #include <asm/thread_info.h> */
-
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
* because we need identity-mapped pages on setup so define __START_KERNEL to
@@ -43,10 +41,9 @@
startup_64:
ENTRY(_start)
cld
- movq init_rsp(%rip),%rsp
/* Copy the necessary stuff from xen_start_info structure. */
movq $xen_start_info_union,%rdi
- movq $64,%rcx /* sizeof (union xen_start_info_union) / sizeof
(long) */
+ movq $256,%rcx
rep movsq
#ifdef CONFIG_SMP
@@ -54,6 +51,7 @@
cld
#endif /* CONFIG_SMP */
+ movq init_rsp(%rip),%rsp
/* zero EFLAGS after setting rsp */
pushq $0
popfq
@@ -116,15 +114,81 @@
ENTRY(init_level4_user_pgt)
.fill 512,8,0
+ /*
+ * In Xen the following pre-initialized pgt entries are re-initialized.
+ */
+.org 0x3000
+ENTRY(level3_kernel_pgt)
+ .fill 510,8,0
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+ .quad 0x0000000000105007 /* -> level2_kernel_pgt */
+ .fill 1,8,0
+
+.org 0x4000
+ENTRY(level2_ident_pgt)
+ /* 40MB for bootup. */
+ .quad 0x0000000000000283
+ .quad 0x0000000000200183
+ .quad 0x0000000000400183
+ .quad 0x0000000000600183
+ .quad 0x0000000000800183
+ .quad 0x0000000000A00183
+ .quad 0x0000000000C00183
+ .quad 0x0000000000E00183
+ .quad 0x0000000001000183
+ .quad 0x0000000001200183
+ .quad 0x0000000001400183
+ .quad 0x0000000001600183
+ .quad 0x0000000001800183
+ .quad 0x0000000001A00183
+ .quad 0x0000000001C00183
+ .quad 0x0000000001E00183
+ .quad 0x0000000002000183
+ .quad 0x0000000002200183
+ .quad 0x0000000002400183
+ .quad 0x0000000002600183
+ /* Temporary mappings for the super early allocator in
arch/x86_64/mm/init.c */
+ .globl temp_boot_pmds
+temp_boot_pmds:
+ .fill 492,8,0
+
+.org 0x5000
+ENTRY(level2_kernel_pgt)
+ /* 40MB kernel mapping. The kernel code cannot be bigger than that.
+ When you change this change KERNEL_TEXT_SIZE in page.h too. */
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
+ .quad 0x0000000000000183
+ .quad 0x0000000000200183
+ .quad 0x0000000000400183
+ .quad 0x0000000000600183
+ .quad 0x0000000000800183
+ .quad 0x0000000000A00183
+ .quad 0x0000000000C00183
+ .quad 0x0000000000E00183
+ .quad 0x0000000001000183
+ .quad 0x0000000001200183
+ .quad 0x0000000001400183
+ .quad 0x0000000001600183
+ .quad 0x0000000001800183
+ .quad 0x0000000001A00183
+ .quad 0x0000000001C00183
+ .quad 0x0000000001E00183
+ .quad 0x0000000002000183
+ .quad 0x0000000002200183
+ .quad 0x0000000002400183
+ .quad 0x0000000002600183
+ /* Module mapping starts here */
+ .fill 492,8,0
+
/*
* This is used for vsyscall area mapping as we have a different
* level4 page table for user.
*/
-.org 0x3000
+.org 0x6000
ENTRY(level3_user_pgt)
.fill 512,8,0
-.org 0x4000
+.org 0x7000
ENTRY(cpu_gdt_table)
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
@@ -140,26 +204,34 @@
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
-
-gdt_end:
+ .quad 0 /* unused now? __KERNEL16_CS - 16bit
PM for S3 wakeup. */
+
+gdt_end:
+#if 0
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
/* GDTs of other CPUs: */
.fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
-
-.org 0x5000
+#endif
+
+.org 0x8000
ENTRY(empty_zero_page)
-.org 0x6000
+.org 0x9000
ENTRY(empty_bad_page)
-.org 0x7000
+.org 0xa000
ENTRY(empty_bad_pte_table)
-.org 0x8000
+.org 0xb000
ENTRY(empty_bad_pmd_table)
- .org 0x9000
+.org 0xc000
+ENTRY(level3_physmem_pgt)
+ .quad 0x0000000000105007 /* -> level2_kernel_pgt (so
that __va works even before pagetable_init) */
+
+
+ .org 0xd000
#ifdef CONFIG_ACPI_SLEEP
ENTRY(wakeup_level4_pgt)
.quad 0x0000000000102007 /* -> level3_ident_pgt */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c Thu Aug 25
22:53:20 2005
@@ -30,7 +30,6 @@
unsigned int old_io_pl = current->thread.io_pl;
physdev_op_t op;
-
if (new_io_pl > 3)
return -EINVAL;
@@ -38,16 +37,12 @@
if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
return -EPERM;
- /* Maintain OS privileges even if user attempts to relinquish them. */
- if (new_io_pl == 0)
- new_io_pl = 1;
-
/* Change our version of the privilege levels. */
current->thread.io_pl = new_io_pl;
/* Force the change at ring 0. */
op.cmd = PHYSDEVOP_SET_IOPL;
- op.u.set_iopl.iopl = new_io_pl;
+ op.u.set_iopl.iopl = (new_io_pl == 0) ? 1 : new_io_pl;
HYPERVISOR_physdev_op(&op);
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 25 22:53:20 2005
@@ -21,6 +21,11 @@
atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
/*
* Generic, controller-independent functions:
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Thu Aug 25
22:53:20 2005
@@ -61,6 +61,7 @@
EXPORT_SYMBOL(dma_free_coherent);
#endif
+#if 0
int dma_supported(struct device *hwdev, u64 mask)
{
/*
@@ -76,6 +77,7 @@
return 1;
}
EXPORT_SYMBOL(dma_supported);
+#endif
int dma_get_cache_alignment(void)
{
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Thu Aug 25
22:53:20 2005
@@ -148,7 +148,6 @@
if (cpu_is_offline(cpu))
play_dead();
- __IRQ_STAT(cpu,idle_timestamp) = jiffies;
xen_idle();
}
@@ -454,8 +453,8 @@
#define C(i) do { \
if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \
mcl->op = __HYPERVISOR_update_descriptor; \
- mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
- [GDT_ENTRY_TLS_MIN + i]); \
+ mcl->args[0] = virt_to_machine( \
+ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
mcl->args[1] = next->tls_array[i]; \
mcl++; \
} \
@@ -465,7 +464,7 @@
if (unlikely(prev->io_pl != next->io_pl)) {
iopl_op.cmd = PHYSDEVOP_SET_IOPL;
- iopl_op.u.set_iopl.iopl = next->io_pl;
+ iopl_op.u.set_iopl.iopl = (next->io_pl == 0) ? 1 : next->io_pl;
mcl->op = __HYPERVISOR_physdev_op;
mcl->args[0] = (unsigned long)&iopl_op;
mcl++;
@@ -475,7 +474,7 @@
iobmp_op.cmd =
PHYSDEVOP_SET_IOBITMAP;
iobmp_op.u.set_iobitmap.bitmap =
- (unsigned long)next->io_bitmap_ptr;
+ (char *)next->io_bitmap_ptr;
iobmp_op.u.set_iobitmap.nr_ports =
next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
mcl->op = __HYPERVISOR_physdev_op;
@@ -744,3 +743,9 @@
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+#ifndef CONFIG_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Aug 25
22:53:20 2005
@@ -40,7 +40,6 @@
#include <linux/acpi.h>
#include <linux/kallsyms.h>
#include <linux/edd.h>
-#include <linux/percpu.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -58,27 +57,36 @@
#include <asm/setup.h>
#include <asm/mach_apic.h>
#include <asm/numa.h>
+#ifdef CONFIG_XEN
+#include <linux/percpu.h>
#include <asm-xen/xen-public/physdev.h>
#include "setup_arch_pre.h"
#include <asm-xen/hypervisor.h>
-
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-
+#define end_pfn_map end_pfn
#include <asm/mach-xen/setup_arch_post.h>
extern unsigned long start_pfn;
-
-#if 0
-struct edid_info {
- unsigned char dummy[128];
-};
-#endif
-
extern struct edid_info edid_info;
+
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
/* Allows setting of maximum possible memory size */
unsigned long xen_override_max_pfn;
+
+u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
+DEFINE_PER_CPU(int, nr_multicall_ents);
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+#endif
+
/*
* Machine setup..
*/
@@ -166,7 +174,7 @@
#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
@@ -200,12 +208,14 @@
#define ADAPTER_ROM_RESOURCES \
(sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
static struct resource video_rom_resource = {
.name = "Video ROM",
.start = 0xc0000,
.end = 0xc7fff,
.flags = IORESOURCE_ROM,
};
+#endif
static struct resource video_ram_resource = {
.name = "Video RAM area",
@@ -214,7 +224,7 @@
.flags = IORESOURCE_RAM,
};
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
static int __init romchecksum(unsigned char *rom, unsigned long length)
@@ -292,33 +302,24 @@
}
#endif
-/*
- * Point at the empty zero page to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
-
-EXPORT_SYMBOL(phys_to_machine_mapping);
-
-DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
-DEFINE_PER_CPU(int, nr_multicall_ents);
-
-/* Raw start-of-day parameters from the hypervisor. */
-union xen_start_info_union xen_start_info_union;
static __init void parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = COMMAND_LINE;
- int len = 0, max_cmdline;
-
+ int len = 0;
+
+ /* Save unparsed command line copy for /proc/cmdline */
+#ifdef CONFIG_XEN
+ int max_cmdline;
+
if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
max_cmdline = COMMAND_LINE_SIZE;
memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
- /* Save unparsed command line copy for /proc/cmdline */
saved_command_line[max_cmdline-1] = '\0';
+#else
+ memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
+ saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+#endif
for (;;) {
if (c != ' ')
@@ -376,8 +377,7 @@
acpi_skip_timer_override = 1;
#endif
#endif
-
-#if 0
+#ifndef CONFIG_XEN
if (!memcmp(from, "nolapic", 7) ||
!memcmp(from, "disableapic", 11))
disable_apic = 1;
@@ -389,8 +389,7 @@
skip_ioapic_setup = 0;
ioapic_force = 1;
}
-#endif
-
+#endif
if (!memcmp(from, "mem=", 4))
parse_memopt(from+4, &from);
@@ -424,34 +423,28 @@
}
#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_XEN
static void __init contig_initmem_init(void)
{
+ unsigned long bootmap_size = init_bootmem(start_pfn, end_pfn);
+ free_bootmem(0, end_pfn << PAGE_SHIFT);
+ reserve_bootmem(HIGH_MEMORY,
+ (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
+ - HIGH_MEMORY);
+}
+#else
+static void __init contig_initmem_init(void)
+{
unsigned long bootmap_size, bootmap;
-
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
-
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
- bootmap = start_pfn;
- bootmap_size = init_bootmem(bootmap, end_pfn);
+ bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
+ if (bootmap == -1L)
+ panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+ bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+ e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size);
-
- free_bootmem(start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) <<
PAGE_SHIFT);
- printk("Registering memory for bootmem: from %lx, size = %lx\n",
- start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) <<
PAGE_SHIFT);
- /*
- * This should cover kernel_end
- */
-#if 0
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
- bootmap_size + PAGE_SIZE-1) -
(HIGH_MEMORY));
-#endif
- reserve_bootmem(0, (PFN_PHYS(start_pfn) +
- bootmap_size + PAGE_SIZE-1));
-
}
+#endif /* !CONFIG_XEN */
#endif
/* Use inline assembly to define this because the nops are defined
@@ -543,7 +536,7 @@
}
#endif
-#if 0
+#ifndef CONFIG_XEN
#define EBDA_ADDR_POINTER 0x40E
static void __init reserve_ebda_region(void)
{
@@ -559,73 +552,17 @@
}
#endif
-/*
- * Guest physical starts from 0.
- */
-
-unsigned long __init xen_end_of_ram(void)
-{
- unsigned long max_end_pfn = xen_start_info.nr_pages;
-
- if ( xen_override_max_pfn < max_end_pfn)
- xen_override_max_pfn = max_end_pfn;
-
- return xen_override_max_pfn;
-}
-
-static void __init print_memory_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- early_printk(" %s: %016Lx - %016Lx ", who,
- e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
- switch (e820.map[i].type) {
- case E820_RAM: early_printk("(usable)\n");
- break;
- case E820_RESERVED:
- early_printk("(reserved)\n");
- break;
- case E820_ACPI:
- early_printk("(ACPI data)\n");
- break;
- case E820_NVS:
- early_printk("(ACPI NVS)\n");
- break;
- default: early_printk("type %u\n", e820.map[i].type);
- break;
- }
- }
-}
-
void __init setup_arch(char **cmdline_p)
{
- int i, j;
- physdev_op_t op;
-
-#if 0
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
-#else
+ unsigned long kernel_end;
+
+#ifdef CONFIG_XEN
ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
-#endif
drive_info = DRIVE_INFO;
-
+ kernel_end = 0; /* dummy */
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
screen_info = SCREEN_INFO;
-#endif
- edid_info = EDID_INFO;
- saved_video_mode = SAVED_VIDEO_MODE;
- bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
-/* register_console(&xen_console); */
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
/* This is drawn from a dump from vgacon:startup in standard Linux. */
screen_info.orig_video_mode = 3;
screen_info.orig_video_isVGA = 1;
@@ -633,27 +570,56 @@
screen_info.orig_video_cols = 80;
screen_info.orig_video_ega_bx = 3;
screen_info.orig_video_points = 16;
-#endif
+#endif
+ edid_info = EDID_INFO;
+ saved_video_mode = SAVED_VIDEO_MODE;
+ bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+
+
+#endif
+
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_writable_pagetables);
+
ARCH_SETUP
- print_memory_map(machine_specific_memory_setup());
-
- /* copy_edd(); */
+#else
+ ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+ drive_info = DRIVE_INFO;
+ screen_info = SCREEN_INFO;
+ edid_info = EDID_INFO;
+ saved_video_mode = SAVED_VIDEO_MODE;
+ bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+ setup_memory_region();
+ copy_edd();
+#endif /* !CONFIG_XEN */
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
-/* init_mm.brk = (unsigned long) &_end; */
+#ifdef CONFIG_XEN
init_mm.brk = start_pfn << PAGE_SHIFT;
-
-
-#if 0 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
+#else
+ init_mm.brk = (unsigned long) &_end;
+
code_resource.start = virt_to_phys(&_text);
code_resource.end = virt_to_phys(&_etext)-1;
data_resource.start = virt_to_phys(&_etext);
data_resource.end = virt_to_phys(&_edata)-1;
#endif
+
parse_cmdline_early(cmdline_p);
early_identify_cpu(&boot_cpu_data);
@@ -662,15 +628,11 @@
* partially used pages are not usable - thus
* we are rounding upwards:
*/
-#if 0
end_pfn = e820_end_of_ram();
-#else
- end_pfn = xen_end_of_ram();
-#endif
check_efer();
- init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
+ init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
#ifdef CONFIG_ACPI_NUMA
/*
@@ -685,24 +647,25 @@
contig_initmem_init();
#endif
- /* Reserve direct mapping and shared info etc. */
-// reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end + 1 -
table_start) << PAGE_SHIFT);
-
-// reserve_bootmem_generic(0, (table_end + 1) << PAGE_SHIFT);
+#ifndef CONFIG_XEN
+ /* Reserve direct mapping */
+ reserve_bootmem_generic(table_start << PAGE_SHIFT,
+ (table_end - table_start) << PAGE_SHIFT);
/* reserve kernel */
-// kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
-
-#if 0
+ kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
+ reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
+
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
reserve_bootmem_generic(0, PAGE_SIZE);
-#endif
/* reserve ebda region */
-/* reserve_ebda_region(); */
+ reserve_ebda_region();
+#endif
+
#ifdef CONFIG_SMP
/*
@@ -722,6 +685,7 @@
*/
acpi_reserve_bootmem();
#endif
+#ifdef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
if (xen_start_info.mod_start) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
@@ -738,6 +702,25 @@
}
}
#endif
+#else /* CONFIG_XEN */
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (LOADER_TYPE && INITRD_START) {
+ if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
+ reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
+ initrd_start =
+ INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_end = initrd_start+INITRD_SIZE;
+ }
+ else {
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ (unsigned long)(INITRD_START + INITRD_SIZE),
+ (unsigned long)(end_pfn << PAGE_SHIFT));
+ initrd_start = 0;
+ }
+ }
+#endif
+#endif /* !CONFIG_XEN */
paging_init();
#ifdef CONFIG_X86_LOCAL_APIC
/*
@@ -745,30 +728,36 @@
*/
find_smp_config();
#endif
- /* Make sure we have a large enough P->M table. */
- if (end_pfn > xen_start_info.nr_pages) {
- phys_to_machine_mapping = alloc_bootmem(
- max_pfn * sizeof(unsigned long));
- memset(phys_to_machine_mapping, ~0,
- max_pfn * sizeof(unsigned long));
- memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- xen_start_info.nr_pages * sizeof(unsigned long));
- free_bootmem(
- __pa(xen_start_info.mfn_list),
- PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
- sizeof(unsigned long))));
- }
-
- pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
-
- for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
- {
- pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
- }
-
-#if 0
+#ifdef CONFIG_XEN
+ {
+ int i, j;
+ /* Make sure we have a large enough P->M table. */
+ if (end_pfn > xen_start_info.nr_pages) {
+ phys_to_machine_mapping = alloc_bootmem(
+ max_pfn * sizeof(u32));
+ memset(phys_to_machine_mapping, ~0,
+ max_pfn * sizeof(u32));
+ memcpy(phys_to_machine_mapping,
+ (u32 *)xen_start_info.mfn_list,
+ xen_start_info.nr_pages * sizeof(u32));
+ free_bootmem(
+ __pa(xen_start_info.mfn_list),
+ PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+ sizeof(u32))));
+ }
+
+ pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
+
+ for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ )
+ {
+ pfn_to_mfn_frame_list[j] =
+ virt_to_mfn(&phys_to_machine_mapping[i]);
+ }
+
+ }
+#endif
+
+#ifndef CONFIG_XEN
check_ioapic();
#endif
@@ -784,6 +773,7 @@
*/
acpi_boot_init();
#endif
+
#ifdef CONFIG_X86_LOCAL_APIC
/*
* get boot-time SMP configuration:
@@ -795,18 +785,14 @@
#endif
#endif
- /* XXX Disable irqdebug until we have a way to avoid interrupt
- * conflicts. */
-/* noirqdebug_setup(""); */
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
probe_roms();
-#endif
-/* e820_reserve_resources(); */
+ e820_reserve_resources();
+#endif
request_resource(&iomem_resource, &video_ram_resource);
@@ -823,14 +809,40 @@
iommu_hole_init();
#endif
- op.cmd = PHYSDEVOP_SET_IOPL;
- op.u.set_iopl.iopl = current->thread.io_pl = 1;
- HYPERVISOR_physdev_op(&op);
-
- if (xen_start_info.flags & SIF_INITDOMAIN) {
- if (!(xen_start_info.flags & SIF_PRIVILEGED))
- panic("Xen granted us console access "
- "but not privileged status");
+#ifdef CONFIG_XEN
+ {
+ physdev_op_t op;
+
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = 1;
+ HYPERVISOR_physdev_op(&op);
+
+ if (xen_start_info.flags & SIF_INITDOMAIN) {
+ if (!(xen_start_info.flags & SIF_PRIVILEGED))
+ panic("Xen granted us console access "
+ "but not privileged status");
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+ conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+#endif
+#endif
+ } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ extern const struct consw xennull_con;
+ extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+ /* disable VGA driver */
+ ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
+#endif
+ conswitchp = &xennull_con;
+ console_use_vt = 0;
+#endif
+ }
+ }
+#else /* CONFIG_XEN */
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
@@ -839,18 +851,8 @@
conswitchp = &dummy_con;
#endif
#endif
- } else {
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- extern const struct consw xennull_con;
- extern int console_use_vt;
-#if defined(CONFIG_VGA_CONSOLE)
- /* disable VGA driver */
- ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
-#endif
- conswitchp = &xennull_con;
- console_use_vt = 0;
-#endif
- }
+
+#endif /* !CONFIG_XEN */
}
static int __init get_model_name(struct cpuinfo_x86 *c)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Thu Aug 25
22:53:20 2005
@@ -30,9 +30,9 @@
#include <asm/proto.h>
#include <asm/mman.h>
#include <asm/numa.h>
-
+#ifdef CONFIG_XEN
#include <asm-xen/hypervisor.h>
-
+#endif
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
@@ -123,16 +123,61 @@
}
}
+#ifdef CONFIG_XEN
+static void switch_pt(void)
+{
+ xen_pt_switch(__pa(init_level4_pgt));
+ xen_new_user_pt(__pa(init_level4_user_pgt));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+ unsigned long frames[16];
+ unsigned long va;
+ int f;
+
+ for (va = gdt_descr->address, f = 0;
+ va < gdt_descr->address + gdt_descr->size;
+ va += PAGE_SIZE, f++) {
+ frames[f] = virt_to_mfn(va);
+ make_page_readonly((void *)va);
+ }
+ if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
+ sizeof (struct desc_struct)))
+ BUG();
+}
+#else
+static void switch_pt(void)
+{
+ asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+#ifdef CONFIG_SMP
+ int cpu = stack_smp_processor_id();
+#else
+ int cpu = smp_processor_id();
+#endif
+
+ asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+ asm volatile("lidt %0" :: "m" (idt_descr));
+}
+#endif
+
+
void pda_init(int cpu)
{
- pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
struct x8664_pda *pda = &cpu_pda[cpu];
/* Setup up data that may be needed in __get_free_pages early */
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+#ifndef CONFIG_XEN
+ wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
+#else
HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
(unsigned long)(cpu_pda + cpu));
-
+#endif
pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
@@ -140,21 +185,14 @@
(unsigned long)stack_thread_info() - PDA_STACKOFFSET +
THREAD_SIZE;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
- pda->kernel_mode = 1;
if (cpu == 0) {
- memcpy((void *)init_level4_pgt,
- (void *) xen_start_info.pt_base, PAGE_SIZE);
+#ifdef CONFIG_XEN
+ xen_init_pt();
+#endif
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
- make_page_readonly(init_level4_pgt);
- make_page_readonly(init_level4_user_pgt);
- make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
- xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
- xen_pud_pin(__pa_symbol(level3_user_pgt));
- set_pgd((pgd_t *)(init_level4_user_pgt + 511),
- mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
} else {
pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
@@ -162,20 +200,7 @@
panic("cannot allocate irqstack for cpu %d", cpu);
}
- xen_pt_switch(__pa(init_level4_pgt));
- xen_new_user_pt(__pa(init_level4_user_pgt));
-
- if (cpu == 0) {
- xen_pgd_unpin(__pa(old_level4));
-#if 0
- early_printk("__pa: %x, <machine_phys> old_level 4 %x\n",
- __pa(xen_start_info.pt_base),
- pfn_to_mfn(__pa(old_level4) >> PAGE_SHIFT));
-#endif
-// make_page_writable(old_level4);
-// free_bootmem(__pa(old_level4), PAGE_SIZE);
- }
-
+ switch_pt();
pda->irqstackptr += IRQSTACKSIZE-64;
}
@@ -185,6 +210,18 @@
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+#ifndef CONFIG_XEN
+ /*
+ * LSTAR and STAR live in a bit strange symbiosis.
+ * They both write to the same internal register. STAR allows to set
CS/DS
+ * but only a 32bit target. LSTAR sets the 64bit rip.
+ */
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsrl(MSR_LSTAR, system_call);
+
+ /* Flags to clear on syscall */
+ wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
+#endif
#ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init ();
#endif
@@ -197,27 +234,8 @@
rdmsrl(MSR_EFER, efer);
if (!(efer & EFER_NX) || do_not_nx) {
__supported_pte_mask &= ~_PAGE_NX;
-
}
}
-
-void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
-{
- unsigned long frames[16];
- unsigned long va;
- int f;
-
- for (va = gdt_descr->address, f = 0;
- va < gdt_descr->address + gdt_descr->size;
- va += PAGE_SIZE, f++) {
- frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
- make_page_readonly((void *)va);
- }
- if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
- sizeof (struct desc_struct)))
- BUG();
-}
-
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -247,36 +265,32 @@
me = current;
- if (test_and_set_bit(cpu, &cpu_initialized))
+ if (cpu_test_and_set(cpu, cpu_initialized))
panic("CPU#%d already initialized!\n", cpu);
printk("Initializing CPU#%d\n", cpu);
-#if 0
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-#endif
+
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
+#ifndef CONFIG_XEN
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
}
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
-#endif
- cpu_gdt_init(&cpu_gdt_descr[cpu]);
-
-#if 0
+
memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES
* 8);
-
-#endif
- memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
+#else
+ memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
GDT_ENTRY_TLS_ENTRIES * 8);
+
+ cpu_gdt_init(&cpu_gdt_descr[cpu]);
+#endif
/*
* Delete NT
@@ -284,12 +298,12 @@
asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ;
popfq" ::: "eax");
- if (cpu == 0)
- early_identify_cpu(&boot_cpu_data);
-
syscall_init();
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier();
+
check_efer();
/*
@@ -321,19 +335,22 @@
BUG();
enter_lazy_tlb(&init_mm, me);
+#ifndef CONFIG_XEN
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+#endif
load_LDT(&init_mm.context);
/*
* Clear all 6 debug registers:
*/
-#define CD(register) HYPERVISOR_set_debugreg(register, 0)
-
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+
+ set_debug(0UL, 0);
+ set_debug(0UL, 1);
+ set_debug(0UL, 2);
+ set_debug(0UL, 3);
+ set_debug(0UL, 6);
+ set_debug(0UL, 7);
+
fpu_init();
-
-#ifdef CONFIG_NUMA
- numa_add_cpu(cpu);
-#endif
-}
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 25 22:53:20 2005
@@ -28,7 +28,12 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apicdef.h>
-
+#ifdef CONFIG_XEN
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+#else
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
@@ -44,6 +49,7 @@
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
#define FLUSH_ALL -1ULL
+#endif
/*
* We cannot call mmdrop() because we are in interrupt context,
@@ -57,6 +63,7 @@
load_cr3(swapper_pg_dir);
}
+#ifndef CONFIG_XEN
/*
*
* The flush IPI assumes that a thread switch happens in this order:
@@ -250,6 +257,18 @@
{
on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
}
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
void smp_kdb_stop(void)
{
@@ -310,13 +329,21 @@
/* Wait for response */
while (atomic_read(&data.started) != cpus)
+#ifndef CONFIG_XEN
cpu_relax();
+#else
+ barrier();
+#endif
if (!wait)
return;
while (atomic_read(&data.finished) != cpus)
+#ifndef CONFIG_XEN
cpu_relax();
+#else
+ barrier();
+#endif
}
/*
@@ -350,7 +377,11 @@
*/
cpu_clear(smp_processor_id(), cpu_online_map);
local_irq_disable();
+#ifndef CONFIG_XEN
disable_local_APIC();
+#else
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#endif
local_irq_enable();
}
@@ -364,8 +395,10 @@
void smp_send_stop(void)
{
int nolock = 0;
+#ifndef CONFIG_XEN
if (reboot_force)
return;
+#endif
/* Don't deadlock on the call lock in panic */
if (!spin_trylock(&call_lock)) {
/* ignore locking because we have paniced anyways */
@@ -376,7 +409,11 @@
spin_unlock(&call_lock);
local_irq_disable();
+#ifdef CONFIG_XEN
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#else
disable_local_APIC();
+#endif
local_irq_enable();
}
@@ -385,18 +422,32 @@
* all the work is done automatically when
* we return from the interrupt.
*/
+#ifndef CONFIG_XEN
asmlinkage void smp_reschedule_interrupt(void)
-{
+#else
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
+#endif
+{
+#ifndef CONFIG_XEN
ack_APIC_irq();
-}
-
+#else
+ return IRQ_HANDLED;
+#endif
+}
+
+#ifndef CONFIG_XEN
asmlinkage void smp_call_function_interrupt(void)
+#else
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
+#endif
{
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
int wait = call_data->wait;
+#ifndef CONFIG_XEN
ack_APIC_irq();
+#endif
/*
* Notify initiating CPU that I've grabbed the data and am
* about to execute the function
@@ -413,10 +464,16 @@
mb();
atomic_inc(&call_data->finished);
}
+#ifdef CONFIG_XEN
+ return IRQ_HANDLED;
+#endif
}
int safe_smp_processor_id(void)
{
+#ifdef CONFIG_XEN
+ return smp_processor_id();
+#else
int apicid, i;
if (disable_apic)
@@ -437,4 +494,5 @@
return 0;
return 0; /* Should not happen */
-}
+#endif
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Aug 25
22:53:20 2005
@@ -47,6 +47,9 @@
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>
+#ifdef CONFIG_XEN
+#include <linux/interrupt.h>
+#endif
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -57,11 +60,20 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/nmi.h>
+#ifdef CONFIG_XEN
+#include <asm/arch_hooks.h>
+
+#include <asm-xen/evtchn.h>
+#endif
/* Change for real CPU hotplug. Note other files need to be fixed
first too. */
#define __cpuinit __init
#define __cpuinitdata __initdata
+
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ unsigned int maxcpus = NR_CPUS;
+#endif
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -96,6 +108,7 @@
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_core_map);
+#ifndef CONFIG_XEN
/*
* Trampoline 80x86 program as an array.
*/
@@ -115,6 +128,7 @@
memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(tramp);
}
+#endif
/*
* The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +144,7 @@
print_cpu_info(c);
}
+#ifndef CONFIG_XEN
/*
* New Funky TSC sync algorithm borrowed from IA64.
* Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +346,7 @@
return 0;
}
__setup("notscsync", notscsync_setup);
+#endif
static atomic_t init_deasserted __cpuinitdata;
@@ -343,6 +359,7 @@
int cpuid, phys_id;
unsigned long timeout;
+#ifndef CONFIG_XEN
/*
* If waken up by an INIT in an 82489DX configuration
* we may get here before an INIT-deassert IPI reaches
@@ -352,10 +369,15 @@
while (!atomic_read(&init_deasserted))
cpu_relax();
+#endif
/*
* (This works even if the APIC is not enabled.)
*/
+#ifndef CONFIG_XEN
phys_id = GET_APIC_ID(apic_read(APIC_ID));
+#else
+ phys_id = smp_processor_id();
+#endif
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +411,7 @@
cpuid);
}
+#ifndef CONFIG_XEN
/*
* the boot CPU has finished the init stage and is spinning
* on callin_map until we finish. We are free to set up this
@@ -398,6 +421,7 @@
Dprintk("CALLIN, before setup_local_APIC().\n");
setup_local_APIC();
+#endif
/*
* Get our bogomips.
@@ -405,7 +429,9 @@
calibrate_delay();
Dprintk("Stack at about %p\n",&cpuid);
+#ifndef CONFIG_XEN
disable_APIC_timer();
+#endif
/*
* Save our processor parameters
@@ -417,6 +443,29 @@
*/
cpu_set(cpuid, cpu_callin_map);
}
+
+#ifdef CONFIG_XEN
+static irqreturn_t ldebug_interrupt(
+ int irq, void *dev_id, struct pt_regs *regs)
+{
+ return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+ sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+ BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+ SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+#endif
/*
* Setup code on secondary processor (after comming out of the trampoline)
@@ -434,6 +483,7 @@
/* otherwise gcc will move up the smp_processor_id before the cpu_init
*/
barrier();
+#ifndef CONFIG_XEN
Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
setup_secondary_APIC_clock();
@@ -446,6 +496,12 @@
}
enable_APIC_timer();
+#else
+ local_setup_timer();
+ ldebug_setup();
+ smp_intr_init();
+ local_irq_enable();
+#endif
/*
* Allow the master to continue.
@@ -453,10 +509,12 @@
cpu_set(smp_processor_id(), cpu_online_map);
mb();
+#ifndef CONFIG_XEN
/* Wait for TSC sync to not schedule things before.
We still process interrupts, which could see an inconsistent
time in that window unfortunately. */
tsc_sync_wait();
+#endif
cpu_idle();
}
@@ -464,6 +522,7 @@
extern volatile unsigned long init_rsp;
extern void (*initial_code)(void);
+#ifndef CONFIG_XEN
#if APIC_DEBUG
static void inquire_remote_apic(int apicid)
{
@@ -627,6 +686,7 @@
return (send_status | accept_status);
}
+#endif
/*
* Boot one CPU.
@@ -637,6 +697,14 @@
unsigned long boot_error;
int timeout;
unsigned long start_rip;
+#ifdef CONFIG_XEN
+ vcpu_guest_context_t ctxt;
+ extern void startup_64_smp(void);
+ extern void hypervisor_callback(void);
+ extern void failsafe_callback(void);
+ extern void smp_trap_init(trap_info_t *);
+ int i;
+#endif
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
@@ -649,7 +717,11 @@
cpu_pda[cpu].pcurrent = idle;
+#ifndef CONFIG_XEN
start_rip = setup_trampoline();
+#else
+ start_rip = (unsigned long)startup_64_smp;
+#endif
init_rsp = idle->thread.rsp;
per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +738,95 @@
atomic_set(&init_deasserted, 0);
+#ifdef CONFIG_XEN
+ cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+ BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.flags = VGCF_IN_KERNEL;
+ ctxt.user_regs.ds = __USER_DS;
+ ctxt.user_regs.es = __USER_DS;
+ ctxt.user_regs.fs = 0;
+ ctxt.user_regs.gs = 0;
+ ctxt.user_regs.ss = __KERNEL_DS|0x3;
+ ctxt.user_regs.cs = __KERNEL_CS|0x3;
+ ctxt.user_regs.rip = start_rip;
+ ctxt.user_regs.rsp = idle->thread.rsp;
+#define X86_EFLAGS_IOPL_RING3 0x3000
+ ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3;
+
+ /* FPU is set up to default initial state. */
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+ smp_trap_init(ctxt.trap_ctxt);
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ {
+ unsigned long va;
+ int f;
+
+ for (va = cpu_gdt_descr[cpu].address, f = 0;
+ va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+ va += PAGE_SIZE, f++) {
+ ctxt.gdt_frames[f] = virt_to_mfn(va);
+ make_page_readonly((void *)va);
+ }
+ ctxt.gdt_ents = GDT_ENTRIES;
+ }
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.rsp;
+
+ /* Callback handlers. */
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+ ctxt.syscall_callback_eip = (unsigned long)system_call;
+
+ ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
+
+ boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+ if (boot_error)
+ printk("boot error: %ld\n", boot_error);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+#else
Dprintk("Setting warm reset code and vector.\n");
CMOS_WRITE(0xa, 0xf);
@@ -729,6 +890,7 @@
#endif
}
}
+#endif
if (boot_error) {
cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -790,6 +952,7 @@
}
}
+#ifndef CONFIG_XEN
/*
* Cleanup possible dangling ends...
*/
@@ -817,6 +980,7 @@
free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
#endif
}
+#endif
/*
* Fall back to non SMP mode after errors.
@@ -827,10 +991,12 @@
{
cpu_present_map = cpumask_of_cpu(0);
cpu_possible_map = cpumask_of_cpu(0);
+#ifndef CONFIG_XEN
if (smp_found_config)
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
else
phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
cpu_set(0, cpu_sibling_map[0]);
cpu_set(0, cpu_core_map[0]);
}
@@ -857,6 +1023,7 @@
*/
static int __cpuinit smp_sanity_check(unsigned max_cpus)
{
+#ifndef CONFIG_XEN
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
hard_smp_processor_id());
@@ -896,13 +1063,19 @@
nr_ioapics = 0;
return -1;
}
+#endif
/*
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
+#ifdef CONFIG_XEN
+ HYPERVISOR_shared_info->n_vcpu = 1;
+#endif
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy
APIC emulation.\n");
+#ifndef CONFIG_XEN
nr_ioapics = 0;
+#endif
return -1;
}
@@ -917,7 +1090,10 @@
{
int i;
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
nmi_watchdog_default();
+#endif
current_cpu_data = boot_cpu_data;
current_thread_info()->cpu = 0; /* needed? */
@@ -927,8 +1103,12 @@
* Fill in cpu_present_mask
*/
for (i = 0; i < NR_CPUS; i++) {
+#ifndef CONFIG_XEN
int apicid = cpu_present_to_apicid(i);
if (physid_isset(apicid, phys_cpu_present_map)) {
+#else
+ if (i < HYPERVISOR_shared_info->n_vcpu) {
+#endif
cpu_set(i, cpu_present_map);
/* possible map would be different if we supported real
CPU hotplug. */
@@ -942,6 +1122,9 @@
return;
}
+#ifdef CONFIG_XEN
+ smp_intr_init();
+#else
/*
* Switch from PIC to APIC mode.
@@ -954,20 +1137,26 @@
GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
/* Or can we switch back to PIC here? */
}
+#endif
/*
* Now start the IO-APICs
*/
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
else
nr_ioapics = 0;
+#endif
/*
* Set up local APIC timer on boot CPU.
*/
+#ifndef CONFIG_XEN
setup_boot_APIC_clock();
+#endif
}
/*
@@ -989,17 +1178,23 @@
int __cpuinit __cpu_up(unsigned int cpu)
{
int err;
+#ifndef CONFIG_XEN
int apicid = cpu_present_to_apicid(cpu);
+#else
+ int apicid = cpu;
+#endif
WARN_ON(irqs_disabled());
Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
+#ifndef CONFIG_XEN
if (apicid == BAD_APICID || apicid == boot_cpu_id ||
!physid_isset(apicid, phys_cpu_present_map)) {
printk("__cpu_up: bad cpu %d\n", cpu);
return -EINVAL;
}
+#endif
/* Boot it! */
err = do_boot_cpu(cpu, apicid);
@@ -1021,15 +1216,82 @@
*/
void __cpuinit smp_cpus_done(unsigned int max_cpus)
{
+#ifndef CONFIG_XEN
zap_low_mappings();
smp_cleanup_boot();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
+#endif
detect_siblings();
+#ifndef CONFIG_XEN
time_init_gtod();
check_nmi_watchdog();
-}
+#endif
+}
+
+#ifdef CONFIG_XEN
+extern int bind_ipi_to_irq(int ipi);
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(resched_irq, cpu) =
+ bind_ipi_to_irq(RESCHEDULE_VECTOR);
+ sprintf(resched_name[cpu], "resched%d", cpu);
+ BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+ SA_INTERRUPT, resched_name[cpu], NULL));
+
+ per_cpu(callfunc_irq, cpu) =
+ bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+ sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+ BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+ smp_call_function_interrupt,
+ SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+ int cpu = smp_processor_id();
+
+ free_irq(per_cpu(resched_irq, cpu), NULL);
+ unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+ free_irq(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
+void smp_suspend(void)
+{
+ /* XXX todo: take down time and ipi's on all cpus */
+ local_teardown_timer_irq();
+ smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+ /* XXX todo: restore time and ipi's on all cpus */
+ smp_intr_init();
+ local_setup_timer_irq();
+}
+
+void _restore_vcpu(void)
+{
+ /* XXX need to write this */
+}
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Aug 25
22:53:20 2005
@@ -953,6 +953,17 @@
cpu_init();
}
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+ trap_info_t *t = trap_table;
+
+ for (t = trap_table; t->address; t++) {
+ trap_ctxt[t->vector].flags = t->flags;
+ trap_ctxt[t->vector].cs = t->cs;
+ trap_ctxt[t->vector].address = t->address;
+ }
+}
+
/* Actual parsing is done early in setup.c. */
static int __init oops_dummy(char *s)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Thu Aug 25
22:53:20 2005
@@ -210,15 +210,16 @@
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
}
-extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t);
-
+#ifdef CONFIG_XEN
static void __init map_vsyscall_user(void)
{
+ extern void __set_fixmap_user(enum fixed_addresses, unsigned long,
pgprot_t);
extern char __vsyscall_0;
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
__set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0,
PAGE_KERNEL_VSYSCALL);
}
+#endif
static int __init vsyscall_init(void)
{
@@ -227,7 +228,10 @@
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
map_vsyscall();
- map_vsyscall_user(); /* establish tranlation for user address space
*/
+#ifdef CONFIG_XEN
+ map_vsyscall_user();
+ sysctl_vsyscall = 0; /* disable vgettimeofay() */
+#endif
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2, 0);
#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Thu Aug 25
22:53:20 2005
@@ -8,11 +8,14 @@
#define sizeof_vcpu_shift 3
#ifdef CONFIG_SMP
-#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \
movq %gs:pda_cpunumber,reg ; \
- shl $sizeof_vcpu_shift,reg ; \
+ shl $32, reg ; \
+ shr $32-sizeof_vcpu_shift,reg ; \
addq HYPERVISOR_shared_info,reg
#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \
#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Thu Aug 25 22:53:20 2005
@@ -6,10 +6,10 @@
CFLAGS += -Iarch/$(XENARCH)/mm
-obj-y := init.o fault.o ioremap.o pageattr.o
+obj-y := init.o fault.o pageattr.o
c-obj-y := extable.o
-i386-obj-y := hypervisor.o
+i386-obj-y := hypervisor.o ioremap.o
#obj-y := init.o fault.o ioremap.o extable.o pageattr.o
#c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Thu Aug 25 22:53:20 2005
@@ -250,7 +250,11 @@
happen within a race in page table update. In the later
case just flush. */
- pgd = pgd_offset(current->mm ?: &init_mm, address);
+ /* On Xen the line below does not always work. Needs investigating! */
+ /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
+ pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+ pgd += pgd_index(address);
+
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Thu Aug 25 22:53:20 2005
@@ -40,12 +40,14 @@
#include <asm/proto.h>
#include <asm/smp.h>
+extern unsigned long *contiguous_bitmap;
+
+#if defined(CONFIG_SWIOTLB)
+extern void swiotlb_init(void);
+#endif
+
#ifndef Dprintk
#define Dprintk(x...)
-#endif
-
-#ifdef CONFIG_GART_IOMMU
-extern int swiotlb;
#endif
extern char _stext[];
@@ -280,7 +282,7 @@
if (!pte_none(*pte) &&
pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
pte_ERROR(*pte);
- xen_l1_entry_update(pte, new_pte);
+ set_pte(pte, new_pte);
/*
* It's enough to flush this one mapping.
@@ -439,6 +441,31 @@
*dst = val;
}
+static inline int make_readonly(unsigned long paddr)
+{
+ int readonly = 0;
+
+ /* Make new page tables read-only. */
+ if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
+ (paddr >= (table_start << PAGE_SHIFT)))
+ readonly = 1;
+
+ /* Make old page tables read-only. */
+ if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
+ (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
+ (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
+ readonly = 1;
+
+ /*
+ * No need for writable mapping of kernel image. This also ensures that
+ * page and descriptor tables embedded inside don't have writable mappings.
+ */
+ if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+ readonly = 1;
+
+ return readonly;
+}
+
void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j, k;
@@ -475,9 +502,7 @@
pte = alloc_low_page(&pte_phys);
pte_save = pte;
for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr +=
PTE_SIZE) {
- if (paddr < (table_start << PAGE_SHIFT)
- + tables_space)
- {
+ if (make_readonly(paddr)) {
__set_pte(pte,
__pte(paddr | (_KERNPG_TABLE &
~_PAGE_RW)));
continue;
@@ -511,75 +536,106 @@
round_up(ptes * 8, PAGE_SIZE);
}
+void __init xen_init_pt(void)
+{
+ unsigned long addr, *page;
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ per_cpu(cur_pgd, i) = init_mm.pgd;
+
+ memset((void *)init_level4_pgt, 0, PAGE_SIZE);
+ memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
+ memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
+
+ /* Find the initial pte page that was built for us. */
+ page = (unsigned long *)xen_start_info.pt_base;
+ addr = page[pgd_index(__START_KERNEL_map)];
+ addr_to_page(addr, page);
+ addr = page[pud_index(__START_KERNEL_map)];
+ addr_to_page(addr, page);
+
+ /* Construct mapping of initial pte page in our own directories. */
+ init_level4_pgt[pgd_index(__START_KERNEL_map)] =
+ mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
+ level3_kernel_pgt[pud_index(__START_KERNEL_map)] =
+ __pud(__pa_symbol(level2_kernel_pgt) |
+ _KERNPG_TABLE | _PAGE_USER);
+ memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
+
+ make_page_readonly(init_level4_pgt);
+ make_page_readonly(init_level4_user_pgt);
+ make_page_readonly(level3_kernel_pgt);
+ make_page_readonly(level3_user_pgt);
+ make_page_readonly(level2_kernel_pgt);
+
+ xen_pgd_pin(__pa_symbol(init_level4_pgt));
+ xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
+ xen_pud_pin(__pa_symbol(level3_kernel_pgt));
+ xen_pud_pin(__pa_symbol(level3_user_pgt));
+ xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
+
+ set_pgd((pgd_t *)(init_level4_user_pgt + 511),
+ mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+}
/*
* Extend kernel mapping to access pages for page tables. The initial
* mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
* mapping for early initialization.
*/
-
-#define MIN_INIT_SIZE 0x800000
static unsigned long current_size, extended_size;
void __init extend_init_mapping(void)
{
unsigned long va = __START_KERNEL_map;
- unsigned long addr, *pte_page;
-
- unsigned long phys;
+ unsigned long phys, addr, *pte_page;
pmd_t *pmd;
pte_t *pte, new_pte;
unsigned long *page = (unsigned long *) init_level4_pgt;
int i;
- addr = (unsigned long) page[pgd_index(va)];
+ addr = page[pgd_index(va)];
addr_to_page(addr, page);
-
addr = page[pud_index(va)];
addr_to_page(addr, page);
for (;;) {
+ pmd = (pmd_t *)&page[pmd_index(va)];
+ if (!pmd_present(*pmd))
+ break;
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, pte_page);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = (pte_t *) &pte_page[pte_index(va)];
+ if (!pte_present(*pte))
+ break;
+ va += PAGE_SIZE;
+ current_size += PAGE_SIZE;
+ }
+ }
+
+ while (va < __START_KERNEL_map + current_size + tables_space) {
pmd = (pmd_t *) &page[pmd_index(va)];
- if (pmd_present(*pmd)) {
- /*
- * if pmd is valid, check pte.
- */
- addr = page[pmd_index(va)];
- addr_to_page(addr, pte_page);
-
- for (i = 0; i < PTRS_PER_PTE; i++) {
- pte = (pte_t *) &pte_page[pte_index(va)];
-
- if (pte_present(*pte)) {
- va += PAGE_SIZE;
- current_size += PAGE_SIZE;
- } else
- break;
- }
-
- } else
- break;
- }
-
- for (; va < __START_KERNEL_map + current_size + tables_space; ) {
- pmd = (pmd_t *) &page[pmd_index(va)];
-
- if (pmd_none(*pmd)) {
- pte_page = (unsigned long *) alloc_static_page(&phys);
- make_page_readonly(pte_page);
- xen_pte_pin(phys);
- set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
-
- for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
- new_pte = pfn_pte((va - __START_KERNEL_map) >>
PAGE_SHIFT,
- __pgprot(_KERNPG_TABLE |
_PAGE_USER));
-
- pte = (pte_t *) &pte_page[pte_index(va)];
- xen_l1_entry_update(pte, new_pte);
- extended_size += PAGE_SIZE;
- }
- }
- }
+ if (!pmd_none(*pmd))
+ continue;
+ pte_page = (unsigned long *) alloc_static_page(&phys);
+ make_page_readonly(pte_page);
+ xen_pte_pin(phys);
+ set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+ for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+ new_pte = pfn_pte(
+ (va - __START_KERNEL_map) >> PAGE_SHIFT,
+ __pgprot(_KERNPG_TABLE | _PAGE_USER));
+ pte = (pte_t *)&pte_page[pte_index(va)];
+ xen_l1_entry_update(pte, new_pte);
+ extended_size += PAGE_SIZE;
+ }
+ }
+
+ /* Kill mapping of low 1MB. */
+ for (va = __START_KERNEL_map; va < (unsigned long)&_text; va +=
PAGE_SIZE)
+ HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
}
@@ -620,10 +676,6 @@
start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
- /*
- * TBD: Need to calculate at runtime
- */
-
__flush_tlb_all();
init_mapping_done = 1;
}
@@ -670,7 +722,7 @@
set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
else
__set_fixmap(FIX_ISAMAP_BEGIN - i,
- virt_to_machine(empty_zero_page),
+ virt_to_mfn(empty_zero_page) <<
PAGE_SHIFT,
PAGE_KERNEL_RO);
}
#endif
@@ -720,8 +772,6 @@
return 1;
}
-extern int swiotlb_force;
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
@@ -730,14 +780,13 @@
int codesize, reservedpages, datasize, initsize;
int tmp;
-#ifdef CONFIG_SWIOTLB
- if (swiotlb_force)
- swiotlb = 1;
- if (!iommu_aperture &&
- (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
- swiotlb = 1;
- if (swiotlb)
- swiotlb_init();
+ contiguous_bitmap = alloc_bootmem_low_pages(
+ (end_pfn + 2*BITS_PER_LONG) >> 3);
+ BUG_ON(!contiguous_bitmap);
+ memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
+
+#if defined(CONFIG_SWIOTLB)
+ swiotlb_init();
#endif
/* How many end-of-memory variables you have, grandma! */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Thu Aug 25 22:53:20 2005
@@ -30,8 +30,9 @@
$(patsubst %.o,$(obj)/%.c,$(c-i386-obj-y)):
@ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
-obj-y += $(c-i386-obj-y) $(c-obj-y)
-obj-y += $(c-xen-obj-y)
+# Make sure irq.o gets linked in before common.o
+obj-y += $(patsubst common.o,$(c-xen-obj-y) common.o,$(c-i386-obj-y))
+obj-y += $(c-obj-y)
clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
clean-files += $(patsubst %.o,%.c,$(c-i386-obj-y) $(c-i386-obj-))
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu Aug 25
22:53:20 2005
@@ -81,20 +81,17 @@
static DECLARE_WORK(balloon_worker, balloon_process, NULL);
static struct timer_list balloon_timer;
-/* Flag for dom0 xenstore workaround */
-static int balloon_xenbus_init=0;
-
-/* Init Function */
-void balloon_init_watcher(void);
-
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private )
-#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l), \
- struct page, private ) )
-#define UNLIST_PAGE(p) do { list_del(PAGE_TO_LIST(p)); \
- p->mapping = NULL; \
- p->private = 0; } while(0)
+#define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
+#define LIST_TO_PAGE(l) \
+ (list_entry(((unsigned long *)l), struct page, private))
+#define UNLIST_PAGE(p) \
+ do { \
+ list_del(PAGE_TO_LIST(p)); \
+ p->mapping = NULL; \
+ p->private = 0; \
+ } while(0)
#else
/* There's a dedicated list field in struct page we can use. */
#define PAGE_TO_LIST(p) ( &p->list )
@@ -110,56 +107,53 @@
#endif
#define IPRINTK(fmt, args...) \
- printk(KERN_INFO "xen_mem: " fmt, ##args)
+ printk(KERN_INFO "xen_mem: " fmt, ##args)
#define WPRINTK(fmt, args...) \
- printk(KERN_WARNING "xen_mem: " fmt, ##args)
+ printk(KERN_WARNING "xen_mem: " fmt, ##args)
/* balloon_append: add the given page to the balloon. */
static void balloon_append(struct page *page)
{
- /* Low memory is re-populated first, so highmem pages go at list tail. */
- if ( PageHighMem(page) )
- {
- list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_high++;
- }
- else
- {
- list_add(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_low++;
- }
+ /* Lowmem is re-populated first, so highmem pages go at list tail. */
+ if (PageHighMem(page)) {
+ list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+ balloon_high++;
+ } else {
+ list_add(PAGE_TO_LIST(page), &ballooned_pages);
+ balloon_low++;
+ }
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
static struct page *balloon_retrieve(void)
{
- struct page *page;
-
- if ( list_empty(&ballooned_pages) )
- return NULL;
-
- page = LIST_TO_PAGE(ballooned_pages.next);
- UNLIST_PAGE(page);
-
- if ( PageHighMem(page) )
- balloon_high--;
- else
- balloon_low--;
-
- return page;
+ struct page *page;
+
+ if (list_empty(&ballooned_pages))
+ return NULL;
+
+ page = LIST_TO_PAGE(ballooned_pages.next);
+ UNLIST_PAGE(page);
+
+ if (PageHighMem(page))
+ balloon_high--;
+ else
+ balloon_low--;
+
+ return page;
}
static void balloon_alarm(unsigned long unused)
{
- schedule_work(&balloon_worker);
+ schedule_work(&balloon_worker);
}
static unsigned long current_target(void)
{
- unsigned long target = min(target_pages, hard_limit);
- if ( target > (current_pages + balloon_low + balloon_high) )
- target = current_pages + balloon_low + balloon_high;
- return target;
+ unsigned long target = min(target_pages, hard_limit);
+ if (target > (current_pages + balloon_low + balloon_high))
+ target = current_pages + balloon_low + balloon_high;
+ return target;
}
/*
@@ -170,353 +164,336 @@
*/
static void balloon_process(void *unused)
{
- unsigned long *mfn_list, pfn, i, flags;
- struct page *page;
- long credit, debt, rc;
- void *v;
-
- down(&balloon_mutex);
+ unsigned long *mfn_list, pfn, i, flags;
+ struct page *page;
+ long credit, debt, rc;
+ void *v;
+
+ down(&balloon_mutex);
retry:
- mfn_list = NULL;
-
- if ( (credit = current_target() - current_pages) > 0 )
- {
- mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
- if ( mfn_list == NULL )
- goto out;
-
- balloon_lock(flags);
- rc = HYPERVISOR_dom_mem_op(
- MEMOP_increase_reservation, mfn_list, credit, 0);
- balloon_unlock(flags);
- if ( rc < credit )
- {
- /* We hit the Xen hard limit: reprobe. */
- if ( HYPERVISOR_dom_mem_op(
- MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
- BUG();
- hard_limit = current_pages + rc - driver_pages;
- vfree(mfn_list);
- goto retry;
- }
-
- for ( i = 0; i < credit; i++ )
- {
- if ( (page = balloon_retrieve()) == NULL )
- BUG();
-
- pfn = page - mem_map;
- if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
- BUG();
-
- /* Update P->M and M->P tables. */
- phys_to_machine_mapping[pfn] = mfn_list[i];
- xen_machphys_update(mfn_list[i], pfn);
+ mfn_list = NULL;
+
+ if ((credit = current_target() - current_pages) > 0) {
+ mfn_list = vmalloc(credit * sizeof(*mfn_list));
+ if (mfn_list == NULL)
+ goto out;
+
+ balloon_lock(flags);
+ rc = HYPERVISOR_dom_mem_op(
+ MEMOP_increase_reservation, mfn_list, credit, 0);
+ balloon_unlock(flags);
+ if (rc < credit) {
+ /* We hit the Xen hard limit: reprobe. */
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation,
+ mfn_list, rc, 0) != rc);
+ hard_limit = current_pages + rc - driver_pages;
+ vfree(mfn_list);
+ goto retry;
+ }
+
+ for (i = 0; i < credit; i++) {
+ page = balloon_retrieve();
+ BUG_ON(page == NULL);
+
+ pfn = page - mem_map;
+ if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
+ BUG();
+
+ /* Update P->M and M->P tables. */
+ phys_to_machine_mapping[pfn] = mfn_list[i];
+ xen_machphys_update(mfn_list[i], pfn);
- /* Link back into the page tables if it's not a highmem page. */
- if ( pfn < max_low_pfn )
- {
- HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma((mfn_list[i] << PAGE_SHIFT) |
- pgprot_val(PAGE_KERNEL)),
- 0);
- }
-
- /* Finally, relinquish the memory back to the system allocator. */
- ClearPageReserved(page);
- set_page_count(page, 1);
- __free_page(page);
- }
-
- current_pages += credit;
- }
- else if ( credit < 0 )
- {
- debt = -credit;
-
- mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
- if ( mfn_list == NULL )
- goto out;
-
- for ( i = 0; i < debt; i++ )
- {
- if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
- {
- debt = i;
- break;
- }
-
- pfn = page - mem_map;
- mfn_list[i] = phys_to_machine_mapping[pfn];
-
- if ( !PageHighMem(page) )
- {
- v = phys_to_virt(pfn << PAGE_SHIFT);
- scrub_pages(v, 1);
- HYPERVISOR_update_va_mapping(
- (unsigned long)v, __pte_ma(0), 0);
- }
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn)
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
+ 0));
+
+ /* Relinquish the page back to the allocator. */
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ __free_page(page);
+ }
+
+ current_pages += credit;
+ } else if (credit < 0) {
+ debt = -credit;
+
+ mfn_list = vmalloc(debt * sizeof(*mfn_list));
+ if (mfn_list == NULL)
+ goto out;
+
+ for (i = 0; i < debt; i++) {
+ if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
+ debt = i;
+ break;
+ }
+
+ pfn = page - mem_map;
+ mfn_list[i] = phys_to_machine_mapping[pfn];
+
+ if (!PageHighMem(page)) {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)v, __pte_ma(0), 0));
+ }
#ifdef CONFIG_XEN_SCRUB_PAGES
- else
- {
- v = kmap(page);
- scrub_pages(v, 1);
- kunmap(page);
- }
+ else {
+ v = kmap(page);
+ scrub_pages(v, 1);
+ kunmap(page);
+ }
#endif
- }
-
- /* Ensure that ballooned highmem pages don't have cached mappings. */
- kmap_flush_unused();
- flush_tlb_all();
-
- /* No more mappings: invalidate pages in P2M and add to balloon. */
- for ( i = 0; i < debt; i++ )
- {
- pfn = mfn_to_pfn(mfn_list[i]);
- phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
- balloon_append(pfn_to_page(pfn));
- }
-
- if ( HYPERVISOR_dom_mem_op(
- MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
- BUG();
-
- current_pages -= debt;
- }
+ }
+
+ /* Ensure that ballooned highmem pages don't have kmaps. */
+ kmap_flush_unused();
+ flush_tlb_all();
+
+ /* No more mappings: invalidate P2M and add to balloon. */
+ for (i = 0; i < debt; i++) {
+ pfn = mfn_to_pfn(mfn_list[i]);
+ phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+ balloon_append(pfn_to_page(pfn));
+ }
+
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation,mfn_list, debt, 0) != debt);
+
+ current_pages -= debt;
+ }
out:
- if ( mfn_list != NULL )
- vfree(mfn_list);
-
- /* Schedule more work if there is some still to be done. */
- if ( current_target() != current_pages )
- mod_timer(&balloon_timer, jiffies + HZ);
-
- up(&balloon_mutex);
+ if (mfn_list != NULL)
+ vfree(mfn_list);
+
+ /* Schedule more work if there is some still to be done. */
+ if (current_target() != current_pages)
+ mod_timer(&balloon_timer, jiffies + HZ);
+
+ up(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
static void set_new_target(unsigned long target)
{
- /* No need for lock. Not read-modify-write updates. */
- hard_limit = ~0UL;
- target_pages = target;
- schedule_work(&balloon_worker);
-}
-
-static struct xenbus_watch xb_watch =
-{
- .node = "memory"
-};
-
-/* FIXME: This is part of a dom0 sequencing workaround */
-static struct xenbus_watch root_watch =
-{
- .node = "/"
+ /* No need for lock. Not read-modify-write updates. */
+ hard_limit = ~0UL;
+ target_pages = target;
+ schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+ .node = "memory/target"
};
/* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch, const char *node)
{
- unsigned long new_target;
- int err;
-
- if(watch == &root_watch)
- {
- /* FIXME: This is part of a dom0 sequencing workaround */
- if(register_xenbus_watch(&xb_watch) == 0)
- {
- /*
- We successfully set a watch on memory/target:
- now we can stop watching root
- */
- unregister_xenbus_watch(&root_watch);
- balloon_xenbus_init=1;
- }
- else
- {
- return;
- }
- }
-
- err = xenbus_scanf("memory", "target", "%lu", &new_target);
+ unsigned long new_target;
+ int err;
+
+ err = xenbus_scanf("memory", "target", "%lu", &new_target);
+ if (err != 1) {
+ printk(KERN_ERR "Unable to read memory/target\n");
+ return;
+ }
- if(err != 1)
- {
- IPRINTK("Unable to read memory/target\n");
- return;
- }
-
- set_new_target(new_target >> PAGE_SHIFT);
+ set_new_target(new_target >> PAGE_SHIFT);
}
-/*
- Try to set up our watcher, if not already set
-
+/* Setup our watcher
+ NB: Assumes xenbus_lock is held!
*/
-void balloon_init_watcher(void)
-{
- int err;
-
- if(!xen_start_info.store_evtchn)
- {
- IPRINTK("Delaying watcher init until xenstore is available\n");
- return;
- }
-
- down(&xenbus_lock);
-
- if(! balloon_xenbus_init)
- {
- err = register_xenbus_watch(&xb_watch);
- if(err)
- {
- /* BIG FAT FIXME: dom0 sequencing workaround
- * dom0 can't set a watch on memory/target until
- * after the tools create it. So, we have to watch
- * the whole store until that happens.
- *
- * This will go away when we have the ability to watch
- * non-existant keys
- */
- register_xenbus_watch(&root_watch);
- }
- else
- {
- IPRINTK("Balloon xenbus watcher initialized\n");
- balloon_xenbus_init = 1;
- }
- }
-
- up(&xenbus_lock);
-
-}
-
-EXPORT_SYMBOL(balloon_init_watcher);
+int balloon_init_watcher(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ int err;
+
+ BUG_ON(down_trylock(&xenbus_lock) == 0);
+
+ err = register_xenbus_watch(&target_watch);
+ if (err)
+ printk(KERN_ERR "Failed to set balloon watcher\n");
+
+ return NOTIFY_DONE;
+
+}
static int balloon_write(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
- char memstring[64], *endchar;
- unsigned long long target_bytes;
-
- if ( !capable(CAP_SYS_ADMIN) )
- return -EPERM;
-
- if ( count <= 1 )
- return -EBADMSG; /* runt */
- if ( count > sizeof(memstring) )
- return -EFBIG; /* too long */
-
- if ( copy_from_user(memstring, buffer, count) )
- return -EFAULT;
- memstring[sizeof(memstring)-1] = '\0';
-
- target_bytes = memparse(memstring, &endchar);
- set_new_target(target_bytes >> PAGE_SHIFT);
-
- return count;
+ char memstring[64], *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (count <= 1)
+ return -EBADMSG; /* runt */
+ if (count > sizeof(memstring))
+ return -EFBIG; /* too long */
+
+ if (copy_from_user(memstring, buffer, count))
+ return -EFAULT;
+ memstring[sizeof(memstring)-1] = '\0';
+
+ target_bytes = memparse(memstring, &endchar);
+ set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
}
static int balloon_read(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len;
-
- len = sprintf(
- page,
- "Current allocation: %8lu kB\n"
- "Requested target: %8lu kB\n"
- "Low-mem balloon: %8lu kB\n"
- "High-mem balloon: %8lu kB\n"
- "Xen hard limit: ",
- PAGES2KB(current_pages), PAGES2KB(target_pages),
- PAGES2KB(balloon_low), PAGES2KB(balloon_high));
-
- if ( hard_limit != ~0UL )
- len += sprintf(
- page + len,
- "%8lu kB (inc. %8lu kB driver headroom)\n",
- PAGES2KB(hard_limit), PAGES2KB(driver_pages));
- else
- len += sprintf(
- page + len,
- " ??? kB\n");
-
- *eof = 1;
- return len;
-}
+ int len;
+
+ len = sprintf(
+ page,
+ "Current allocation: %8lu kB\n"
+ "Requested target: %8lu kB\n"
+ "Low-mem balloon: %8lu kB\n"
+ "High-mem balloon: %8lu kB\n"
+ "Xen hard limit: ",
+ PAGES2KB(current_pages), PAGES2KB(target_pages),
+ PAGES2KB(balloon_low), PAGES2KB(balloon_high));
+
+ if (hard_limit != ~0UL) {
+ len += sprintf(
+ page + len,
+ "%8lu kB (inc. %8lu kB driver headroom)\n",
+ PAGES2KB(hard_limit), PAGES2KB(driver_pages));
+ } else {
+ len += sprintf(
+ page + len,
+ " ??? kB\n");
+ }
+
+ *eof = 1;
+ return len;
+}
+
+static struct notifier_block xenstore_notifier;
static int __init balloon_init(void)
{
- unsigned long pfn;
- struct page *page;
-
- IPRINTK("Initialising balloon driver.\n");
-
- current_pages = min(xen_start_info.nr_pages, max_pfn);
- target_pages = current_pages;
- balloon_low = 0;
- balloon_high = 0;
- driver_pages = 0UL;
- hard_limit = ~0UL;
-
- init_timer(&balloon_timer);
- balloon_timer.data = 0;
- balloon_timer.function = balloon_alarm;
+ unsigned long pfn;
+ struct page *page;
+
+ IPRINTK("Initialising balloon driver.\n");
+
+ current_pages = min(xen_start_info.nr_pages, max_pfn);
+ target_pages = current_pages;
+ balloon_low = 0;
+ balloon_high = 0;
+ driver_pages = 0UL;
+ hard_limit = ~0UL;
+
+ init_timer(&balloon_timer);
+ balloon_timer.data = 0;
+ balloon_timer.function = balloon_alarm;
- if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
- {
- WPRINTK("Unable to create /proc/xen/balloon.\n");
- return -1;
- }
-
- balloon_pde->read_proc = balloon_read;
- balloon_pde->write_proc = balloon_write;
+ if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
+ WPRINTK("Unable to create /proc/xen/balloon.\n");
+ return -1;
+ }
+
+ balloon_pde->read_proc = balloon_read;
+ balloon_pde->write_proc = balloon_write;
- /* Initialise the balloon with excess memory space. */
- for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
- {
- page = &mem_map[pfn];
- if ( !PageReserved(page) )
- balloon_append(page);
- }
-
- xb_watch.callback = watch_target;
- root_watch.callback = watch_target;
-
- balloon_init_watcher();
-
- return 0;
+ /* Initialise the balloon with excess memory space. */
+ for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) {
+ page = &mem_map[pfn];
+ if (!PageReserved(page))
+ balloon_append(page);
+ }
+
+ target_watch.callback = watch_target;
+ xenstore_notifier.notifier_call = balloon_init_watcher;
+
+ register_xenstore_notifier(&xenstore_notifier);
+
+ return 0;
}
subsys_initcall(balloon_init);
void balloon_update_driver_allowance(long delta)
{
- unsigned long flags;
- balloon_lock(flags);
- driver_pages += delta; /* non-atomic update */
- balloon_unlock(flags);
-}
-
-void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
-{
- unsigned long flags;
-
- balloon_lock(flags);
- if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
- mfn_list, nr_mfns, 0) != nr_mfns )
- BUG();
- current_pages -= nr_mfns; /* non-atomic update */
- balloon_unlock(flags);
-
- schedule_work(&balloon_worker);
+ unsigned long flags;
+ balloon_lock(flags);
+ driver_pages += delta; /* non-atomic update */
+ balloon_unlock(flags);
+}
+
+static int dealloc_pte_fn(
+ pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+ unsigned long mfn = pte_mfn(*pte);
+ set_pte(pte, __pte_ma(0));
+ phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
+ INVALID_P2M_ENTRY;
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+ return 0;
+}
+
+struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+ unsigned long vstart, flags;
+ unsigned int order = get_order(nr_pages * PAGE_SIZE);
+
+ vstart = __get_free_pages(GFP_KERNEL, order);
+ if (vstart == 0)
+ return NULL;
+
+ scrub_pages(vstart, 1 << order);
+
+ balloon_lock(flags);
+ BUG_ON(generic_page_range(
+ &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
+ current_pages -= 1UL << order;
+ balloon_unlock(flags);
+
+ schedule_work(&balloon_worker);
+
+ flush_tlb_all();
+
+ return virt_to_page(vstart);
+}
+
+void balloon_dealloc_empty_page_range(
+ struct page *page, unsigned long nr_pages)
+{
+ unsigned long i, flags;
+ unsigned int order = get_order(nr_pages * PAGE_SIZE);
+
+ balloon_lock(flags);
+ for (i = 0; i < (1UL << order); i++)
+ balloon_append(page + i);
+ balloon_unlock(flags);
+
+ schedule_work(&balloon_worker);
}
EXPORT_SYMBOL(balloon_update_driver_allowance);
-EXPORT_SYMBOL(balloon_put_pages);
+EXPORT_SYMBOL(balloon_alloc_empty_page_range);
+EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Thu Aug 25 22:53:20 2005
@@ -1,2 +1,2 @@
-obj-y := blkback.o control.o interface.o vbd.o
+obj-y := blkback.o xenbus.o interface.o vbd.o
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu Aug 25
22:53:20 2005
@@ -11,11 +11,9 @@
* Copyright (c) 2005, Christopher Clark
*/
+#include <linux/spinlock.h>
+#include <asm-xen/balloon.h>
#include "common.h"
-#include <asm-xen/evtchn.h>
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-#include <asm-xen/xen-public/grant_table.h>
-#endif
/*
* These are rather arbitrary. They are fairly large because adjacent requests
@@ -67,9 +65,6 @@
static PEND_RING_IDX pending_prod, pending_cons;
#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static kmem_cache_t *buffer_head_cachep;
-#else
static request_queue_t *plugged_queue;
static inline void flush_plugged_queue(void)
{
@@ -82,9 +77,7 @@
plugged_queue = NULL;
}
}
-#endif
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+
/* When using grant tables to map a frame for device access then the
* handle returned must be used to unmap the frame. This is needed to
* drop the ref count on the frame.
@@ -93,7 +86,6 @@
#define pending_handle(_idx, _i) \
(pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
#define BLKBACK_INVALID_HANDLE (0xFFFF)
-#endif
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/*
@@ -108,14 +100,12 @@
#endif
static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
static void make_response(blkif_t *blkif, unsigned long id,
unsigned short op, int st);
static void fast_flush_area(int idx, int nr_pages)
{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0;
u16 handle;
@@ -124,31 +114,16 @@
{
if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
{
- unmap[i].host_virt_addr = MMAP_VADDR(idx, i);
+ unmap[i].host_addr = MMAP_VADDR(idx, i);
unmap[i].dev_bus_addr = 0;
unmap[i].handle = handle;
- pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+ pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
invcount++;
}
}
if ( unlikely(HYPERVISOR_grant_table_op(
GNTTABOP_unmap_grant_ref, unmap, invcount)))
BUG();
-#else
-
- multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int i;
-
- for ( i = 0; i < nr_pages; i++ )
- {
- MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
- __pte(0), 0);
- }
-
- mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
- BUG();
-#endif
}
@@ -205,11 +180,7 @@
blkif_t *blkif;
struct list_head *ent;
- daemonize(
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- "xenblkd"
-#endif
- );
+ daemonize("xenblkd");
for ( ; ; )
{
@@ -236,11 +207,7 @@
}
/* Push the batch through to disc. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- run_task_queue(&tq_disk);
-#else
flush_plugged_queue();
-#endif
}
}
@@ -289,13 +256,6 @@
}
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
- __end_block_io_op(bh->b_private, uptodate);
- kmem_cache_free(buffer_head_cachep, bh);
-}
-#else
static int end_block_io_op(struct bio *bio, unsigned int done, int error)
{
if ( bio->bi_size != 0 )
@@ -304,7 +264,6 @@
bio_put(bio);
return error;
}
-#endif
/******************************************************************************
@@ -351,10 +310,6 @@
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
dispatch_rw_block_io(blkif, req);
- break;
-
- case BLKIF_OP_PROBE:
- dispatch_probe(blkif, req);
break;
default:
@@ -369,72 +324,6 @@
return more_to_do;
}
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
-{
- int rsp = BLKIF_RSP_ERROR;
- int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
- /* We expect one buffer only. */
- if ( unlikely(req->nr_segments != 1) )
- goto out;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect(req->frame_and_sects[0]) != 7) )
- goto out;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- {
- struct gnttab_map_grant_ref map;
-
- map.host_virt_addr = MMAP_VADDR(pending_idx, 0);
- map.flags = GNTMAP_host_map;
- map.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
- map.dom = blkif->domid;
-
- if ( unlikely(HYPERVISOR_grant_table_op(
- GNTTABOP_map_grant_ref, &map, 1)))
- BUG();
-
- if ( map.handle < 0 )
- goto out;
-
- pending_handle(pending_idx, 0) = map.handle;
- }
-#else /* else CONFIG_XEN_BLKDEV_GRANT */
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
- /* Grab the real frontend out of the probe message. */
- if (req->frame_and_sects[1] == BLKTAP_COOKIE)
- blkif->is_blktap = 1;
-#endif
-
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
- if ( HYPERVISOR_update_va_mapping_otherdomain(
- MMAP_VADDR(pending_idx, 0),
- (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
- 0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
-
- goto out;
-#else
- if ( HYPERVISOR_update_va_mapping_otherdomain(
- MMAP_VADDR(pending_idx, 0),
- (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
- 0, blkif->domid) )
-
- goto out;
-#endif
-#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
-
- rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
- PAGE_SIZE / sizeof(vdisk_t));
-
- out:
- fast_flush_area(pending_idx, 1);
- make_response(blkif, req->id, req->operation, rsp);
-}
-
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
@@ -442,24 +331,15 @@
unsigned long fas = 0;
int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
pending_req_t *pending_req;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-#else
- unsigned long remap_prot;
- multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-#endif
struct phys_req preq;
struct {
unsigned long buf; unsigned int nsec;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int nseg;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- struct buffer_head *bh;
-#else
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int nbio = 0;
request_queue_t *q;
-#endif
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -470,11 +350,10 @@
goto bad_descriptor;
}
- preq.dev = req->device;
+ preq.dev = req->handle;
preq.sector_number = req->sector_number;
preq.nr_sects = 0;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
for ( i = 0; i < nseg; i++ )
{
fas = req->frame_and_sects[i];
@@ -484,7 +363,7 @@
goto bad_descriptor;
preq.nr_sects += seg[i].nsec;
- map[i].host_virt_addr = MMAP_VADDR(pending_idx, i);
+ map[i].host_addr = MMAP_VADDR(pending_idx, i);
map[i].dom = blkif->domid;
map[i].ref = blkif_gref_from_fas(fas);
map[i].flags = GNTMAP_host_map;
@@ -506,25 +385,15 @@
}
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- FOREIGN_FRAME(map[i].dev_bus_addr);
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
pending_handle(pending_idx, i) = map[i].handle;
}
-#endif
for ( i = 0; i < nseg; i++ )
{
fas = req->frame_and_sects[i];
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- seg[i].buf = (map[i].dev_bus_addr << PAGE_SHIFT) |
- (blkif_first_sect(fas) << 9);
-#else
- seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
- seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
- if ( seg[i].nsec <= 0 )
- goto bad_descriptor;
- preq.nr_sects += seg[i].nsec;
-#endif
+ seg[i].buf = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9);
}
if ( vbd_translate(&preq, blkif, operation) != 0 )
@@ -534,40 +403,6 @@
preq.sector_number + preq.nr_sects, preq.dev);
goto bad_descriptor;
}
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- if ( operation == READ )
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
- else
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
-
-
- for ( i = 0; i < nseg; i++ )
- {
- MULTI_update_va_mapping_otherdomain(
- mcl+i, MMAP_VADDR(pending_idx, i),
- pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, __pgprot(remap_prot)),
- 0, blkif->domid);
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
- if ( blkif->is_blktap )
- mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id);
-#endif
- phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
- }
-
- BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
-
- for ( i = 0; i < nseg; i++ )
- {
- if ( unlikely(mcl[i].result != 0) )
- {
- DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(pending_idx, nseg);
- goto bad_descriptor;
- }
- }
-#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
pending_req = &pending_reqs[pending_idx];
pending_req->blkif = blkif;
@@ -575,49 +410,6 @@
pending_req->operation = operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
- atomic_set(&pending_req->pendcnt, nseg);
- pending_cons++;
- blkif_get(blkif);
-
- for ( i = 0; i < nseg; i++ )
- {
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
- if ( unlikely(bh == NULL) )
- {
- __end_block_io_op(pending_req, 0);
- continue;
- }
-
- memset(bh, 0, sizeof (struct buffer_head));
-
- init_waitqueue_head(&bh->b_wait);
- bh->b_size = seg[i].nsec << 9;
- bh->b_dev = preq.dev;
- bh->b_rdev = preq.dev;
- bh->b_rsector = (unsigned long)preq.sector_number;
- bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
- (seg[i].buf & ~PAGE_MASK);
- bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
- bh->b_end_io = end_block_io_op;
- bh->b_private = pending_req;
-
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
- (1 << BH_Req) | (1 << BH_Launder);
- if ( operation == WRITE )
- bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
- atomic_set(&bh->b_count, 1);
-
- /* Dispatch a single request. We'll flush it to disc later. */
- generic_make_request(operation, bh);
-
- preq.sector_number += seg[i].nsec;
- }
-
-#else
for ( i = 0; i < nseg; i++ )
{
@@ -667,8 +459,6 @@
for ( i = 0; i < nbio; i++ )
submit_bio(operation, biolist[i]);
-#endif
-
return;
bad_descriptor:
@@ -712,6 +502,7 @@
static int __init blkif_init(void)
{
int i;
+ struct page *page;
if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
!(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
@@ -719,8 +510,9 @@
blkif_interface_init();
- if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
- BUG();
+ page = balloon_alloc_empty_page_range(MMAP_PAGES);
+ BUG_ON(page == NULL);
+ mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
pending_cons = 0;
pending_prod = MAX_PENDING_REQS;
@@ -734,18 +526,9 @@
if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
BUG();
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- buffer_head_cachep = kmem_cache_create(
- "buffer_head_cache", sizeof(struct buffer_head),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-#endif
-
- blkif_ctrlif_init();
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+ blkif_xenbus_init();
+
memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
- printk(KERN_ALERT "Blkif backend is using grant tables.\n");
-#endif
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Aug 25 22:53:20 2005
@@ -5,17 +5,18 @@
#include <linux/config.h>
#include <linux/version.h>
#include <linux/module.h>
-#include <linux/rbtree.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/io/blkif.h>
#include <asm-xen/xen-public/io/ring.h>
+#include <asm-xen/gnttab.h>
#if 0
#define ASSERT(_p) \
@@ -28,12 +29,13 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-typedef struct rb_root rb_root_t;
-typedef struct rb_node rb_node_t;
-#else
-struct block_device;
-#endif
+struct vbd {
+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
+ unsigned char readonly; /* Non-zero -> read-only */
+ unsigned char type; /* VDISK_xxx */
+ blkif_pdev_t pdevice; /* phys device that this vbd maps to */
+ struct block_device *bdev;
+};
typedef struct blkif_st {
/* Unique identifier for this interface. */
@@ -42,34 +44,25 @@
/* Physical parameters of the comms window. */
unsigned long shmem_frame;
unsigned int evtchn;
- int irq;
+ unsigned int remote_evtchn;
/* Comms information. */
blkif_back_ring_t blk_ring;
/* VBDs attached to this interface. */
- rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/
- spinlock_t vbd_lock; /* Protects VBD mapping. */
+ struct vbd vbd;
/* Private fields. */
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
+ enum { DISCONNECTED, CONNECTED } status;
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/* Is this a blktap frontend */
unsigned int is_blktap;
#endif
- struct blkif_st *hash_next;
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
atomic_t refcnt;
- struct work_struct work;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct work_struct free_work;
u16 shmem_handle;
- memory_t shmem_vaddr;
+ unsigned long shmem_vaddr;
grant_ref_t shmem_ref;
-#endif
} blkif_t;
void blkif_create(blkif_be_create_t *create);
@@ -77,18 +70,25 @@
void blkif_connect(blkif_be_connect_t *connect);
int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *alloc_blkif(domid_t domid);
+void free_blkif_callback(blkif_t *blkif);
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
+
#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define blkif_put(_b) \
do { \
if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- blkif_disconnect_complete(_b); \
+ free_blkif_callback(_b); \
} while (0)
-void vbd_create(blkif_be_vbd_create_t *create);
-void vbd_destroy(blkif_be_vbd_destroy_t *delete);
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
-void destroy_all_vbds(blkif_t *blkif);
+/* Create a vbd. */
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice,
+ int readonly);
+void vbd_free(struct vbd *vbd);
+
+unsigned long vbd_size(struct vbd *vbd);
+unsigned int vbd_info(struct vbd *vbd);
+unsigned long vbd_secsize(struct vbd *vbd);
struct phys_req {
unsigned short dev;
@@ -100,9 +100,10 @@
int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
void blkif_interface_init(void);
-void blkif_ctrlif_init(void);
void blkif_deschedule(blkif_t *blkif);
+
+void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Thu Aug 25
22:53:20 2005
@@ -7,289 +7,137 @@
*/
#include "common.h"
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+#include <asm-xen/evtchn.h>
static kmem_cache_t *blkif_cachep;
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+blkif_t *alloc_blkif(domid_t domid)
{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
+ blkif_t *blkif;
+
+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+ if (!blkif)
+ return ERR_PTR(-ENOMEM);
+
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 1);
+
return blkif;
}
-static void __blkif_disconnect_complete(void *arg)
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+ unsigned long shared_page)
{
- blkif_t *blkif = (blkif_t *)arg;
- ctrl_msg_t cmsg;
- blkif_be_disconnect_t disc;
+ struct gnttab_map_grant_ref op;
+ op.host_addr = localaddr;
+ op.flags = GNTMAP_host_map;
+ op.ref = shared_page;
+ op.dom = blkif->domid;
- /*
- * These can't be done in blkif_disconnect() because at that point there
- * may be outstanding requests at the disc whose asynchronous responses
- * must still be notified to the remote driver.
- */
- unbind_evtchn_from_irq(blkif->evtchn);
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- {
- /*
- * Release the shared memory page.
- */
- struct gnttab_unmap_grant_ref op;
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ return op.handle;
+ }
- op.host_virt_addr = blkif->shmem_vaddr;
- op.handle = blkif->shmem_handle;
- op.dev_bus_addr = 0;
-
- if(unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op,
1))) {
- BUG();
- }
- }
-#endif
- vfree(blkif->blk_ring.sring);
-
- /* Construct the deferred response message. */
- cmsg.type = CMSG_BLKIF_BE;
- cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
- cmsg.id = blkif->disconnect_rspid;
- cmsg.length = sizeof(blkif_be_disconnect_t);
- disc.domid = blkif->domid;
- disc.blkif_handle = blkif->handle;
- disc.status = BLKIF_BE_STATUS_OKAY;
- memcpy(cmsg.msg, &disc, sizeof(disc));
-
- /*
- * Make sure message is constructed /before/ status change, because
- * after the status change the 'blkif' structure could be deallocated at
- * any time. Also make sure we send the response /after/ status change,
- * as otherwise a subsequent CONNECT request could spuriously fail if
- * another CPU doesn't see the status change yet.
- */
- mb();
- if ( blkif->status != DISCONNECTING )
- BUG();
- blkif->status = DISCONNECTED;
- mb();
-
- /* Send the successful response. */
- ctrl_if_send_response(&cmsg);
+ blkif->shmem_ref = shared_page;
+ blkif->shmem_handle = op.handle;
+ blkif->shmem_vaddr = localaddr;
+ return 0;
}
-void blkif_disconnect_complete(blkif_t *blkif)
+static void unmap_frontend_page(blkif_t *blkif)
{
- INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
- schedule_work(&blkif->work);
+ struct gnttab_unmap_grant_ref op;
+
+ op.host_addr = blkif->shmem_vaddr;
+ op.handle = blkif->shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
}
-void blkif_create(blkif_be_create_t *create)
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
{
- domid_t domid = create->domid;
- unsigned int handle = create->blkif_handle;
- blkif_t **pblkif, *blkif;
+ struct vm_struct *vma;
+ blkif_sring_t *sring;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
- if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
- {
- DPRINTK("Could not create blkif: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
+ BUG_ON(blkif->remote_evtchn);
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ return -ENOMEM;
+
+ err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
+ if (err) {
+ vfree(vma->addr);
+ return err;
}
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->status = DISCONNECTED;
- spin_lock_init(&blkif->vbd_lock);
- spin_lock_init(&blkif->blk_ring_lock);
- atomic_set(&blkif->refcnt, 0);
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTK("Could not create blkif: already exists\n");
- create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- kmem_cache_free(blkif_cachep, blkif);
- return;
- }
- pblkif = &(*pblkif)->hash_next;
+ op.u.bind_interdomain.dom1 = DOMID_SELF;
+ op.u.bind_interdomain.dom2 = blkif->domid;
+ op.u.bind_interdomain.port1 = 0;
+ op.u.bind_interdomain.port2 = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_frontend_page(blkif);
+ vfree(vma->addr);
+ return err;
}
- blkif->hash_next = *pblkif;
- *pblkif = blkif;
+ blkif->evtchn = op.u.bind_interdomain.port1;
+ blkif->remote_evtchn = evtchn;
- DPRINTK("Successfully created blkif\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- if ( blkif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pblkif = &blkif->hash_next;
- }
-
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pblkif = blkif->hash_next;
- destroy_all_vbds(blkif);
- kmem_cache_free(blkif_cachep, blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_connect(blkif_be_connect_t *connect)
-{
- domid_t domid = connect->domid;
- unsigned int handle = connect->blkif_handle;
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
- struct vm_struct *vma;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int ref = connect->shmem_ref;
-#else
- pgprot_t prot;
- int error;
-#endif
- blkif_t *blkif;
- blkif_sring_t *sring;
-
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
- connect->domid, connect->blkif_handle);
- connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
- {
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- prot = __pgprot(_KERNPG_TABLE);
- error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
- shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- if ( error == -ENOMEM )
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT )
- connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- else
- connect->status = BLKIF_BE_STATUS_ERROR;
- vfree(vma->addr);
- return;
- }
-#else
- { /* Map: Use the Grant table reference */
- struct gnttab_map_grant_ref op;
- op.host_virt_addr = VMALLOC_VMADDR(vma->addr);
- op.flags = GNTMAP_host_map;
- op.ref = ref;
- op.dom = domid;
-
- BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-
- handle = op.handle;
-
- if (op.handle < 0) {
- DPRINTK(" Grant table operation failure !\n");
- connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- vfree(vma->addr);
- return;
- }
-
- blkif->shmem_ref = ref;
- blkif->shmem_handle = handle;
- blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
- }
-#endif
-
- if ( blkif->status != DISCONNECTED )
- {
- connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- vfree(vma->addr);
- return;
- }
sring = (blkif_sring_t *)vma->addr;
SHARED_RING_INIT(sring);
BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
-
- blkif->evtchn = evtchn;
- blkif->irq = bind_evtchn_to_irq(evtchn);
- blkif->shmem_frame = shmem_frame;
+
+ bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
+ blkif);
blkif->status = CONNECTED;
- blkif_get(blkif);
+ blkif->shmem_frame = shared_page;
- request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
-
- connect->status = BLKIF_BE_STATUS_OKAY;
+ return 0;
}
-int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+static void free_blkif(void *arg)
{
- domid_t domid = disconnect->domid;
- unsigned int handle = disconnect->blkif_handle;
- blkif_t *blkif;
+ evtchn_op_t op = { .cmd = EVTCHNOP_close };
+ blkif_t *blkif = (blkif_t *)arg;
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_disconnect attempted for non-existent blkif"
- " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
- disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return 1; /* Caller will send response error message. */
+ op.u.close.port = blkif->evtchn;
+ op.u.close.dom = DOMID_SELF;
+ HYPERVISOR_event_channel_op(&op);
+ op.u.close.port = blkif->remote_evtchn;
+ op.u.close.dom = blkif->domid;
+ HYPERVISOR_event_channel_op(&op);
+
+ vbd_free(&blkif->vbd);
+
+ if (blkif->evtchn)
+ unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+
+ if (blkif->blk_ring.sring) {
+ unmap_frontend_page(blkif);
+ vfree(blkif->blk_ring.sring);
+ blkif->blk_ring.sring = NULL;
}
- if ( blkif->status == CONNECTED )
- {
- blkif->status = DISCONNECTING;
- blkif->disconnect_rspid = rsp_id;
- wmb(); /* Let other CPUs see the status change. */
- free_irq(blkif->irq, blkif);
- blkif_deschedule(blkif);
- blkif_put(blkif);
- return 0; /* Caller should not send response message. */
- }
+ kmem_cache_free(blkif_cachep, blkif);
+}
- disconnect->status = BLKIF_BE_STATUS_OKAY;
- return 1;
+void free_blkif_callback(blkif_t *blkif)
+{
+ INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+ schedule_work(&blkif->free_work);
}
void __init blkif_interface_init(void)
{
blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
0, 0, NULL, NULL);
- memset(blkif_hash, 0, sizeof(blkif_hash));
}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Thu Aug 25 22:53:20 2005
@@ -3,104 +3,61 @@
*
* Routines for managing virtual block devices (VBDs).
*
- * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups
- * in vbd_translate. All other lookups are implicitly protected because the
- * only caller (the control message dispatch routine) serializes the calls.
- *
* Copyright (c) 2003-2005, Keir Fraser & Steve Hand
*/
#include "common.h"
+#include <asm-xen/xenbus.h>
-struct vbd {
- blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
- unsigned char readonly; /* Non-zero -> read-only */
- unsigned char type; /* VDISK_xxx */
- blkif_pdev_t pdevice; /* phys device that this vbd maps to */
- struct block_device *bdev;
- rb_node_t rb; /* for linking into R-B tree lookup struct */
-};
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{ return MKDEV(cookie>>8, cookie&0xff); }
+{
+ return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+}
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
(_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
#define bdev_put(_b) blkdev_put(_b)
-#else
-#define vbd_sz(_v) (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
-#define bdev_put(_b) ((void)0)
-#define bdev_hardsect_size(_b) 512
-#endif
-void vbd_create(blkif_be_vbd_create_t *create)
+unsigned long vbd_size(struct vbd *vbd)
{
- struct vbd *vbd;
- rb_node_t **rb_p, *rb_parent = NULL;
- blkif_t *blkif;
- blkif_vdev_t vdevice = create->vdevice;
+ return vbd_sz(vbd);
+}
- blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
- create->domid, create->blkif_handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
+unsigned int vbd_info(struct vbd *vbd)
+{
+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
+}
- rb_p = &blkif->vbd_rb.rb_node;
- while ( *rb_p != NULL )
- {
- rb_parent = *rb_p;
- vbd = rb_entry(rb_parent, struct vbd, rb);
- if ( vdevice < vbd->vdevice )
- {
- rb_p = &rb_parent->rb_left;
- }
- else if ( vdevice > vbd->vdevice )
- {
- rb_p = &rb_parent->rb_right;
- }
- else
- {
- DPRINTK("vbd_create attempted for already existing vbd\n");
- create->status = BLKIF_BE_STATUS_VBD_EXISTS;
- return;
- }
- }
+unsigned long vbd_secsize(struct vbd *vbd)
+{
+ return bdev_hardsect_size(vbd->bdev);
+}
- if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
- {
- DPRINTK("vbd_create: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+ blkif_pdev_t pdevice, int readonly)
+{
+ struct vbd *vbd;
- vbd->vdevice = vdevice;
- vbd->readonly = create->readonly;
+ vbd = &blkif->vbd;
+ vbd->handle = handle;
+ vbd->readonly = readonly;
vbd->type = 0;
- /* Mask to 16-bit for compatibility with old tools */
- vbd->pdevice = create->pdevice & 0xffff;
+ vbd->pdevice = pdevice;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
vbd->bdev = open_by_devnum(
vbd_map_devnum(vbd->pdevice),
vbd->readonly ? FMODE_READ : FMODE_WRITE);
if ( IS_ERR(vbd->bdev) )
{
DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- return;
+ return -ENOENT;
}
if ( (vbd->bdev->bd_disk == NULL) )
{
DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- bdev_put(vbd->bdev);
- return;
+ vbd_free(vbd);
+ return -ENOENT;
}
if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
@@ -108,181 +65,27 @@
if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
vbd->type |= VDISK_REMOVABLE;
-#else
- if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
- {
- DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- return;
- }
-#endif
-
- spin_lock(&blkif->vbd_lock);
- rb_link_node(&vbd->rb, rb_parent, rb_p);
- rb_insert_color(&vbd->rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
-
- DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
- vdevice, create->domid);
- create->status = BLKIF_BE_STATUS_OKAY;
+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+ handle, blkif->domid);
+ return 0;
}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
+void vbd_free(struct vbd *vbd)
{
- blkif_t *blkif;
- struct vbd *vbd;
- rb_node_t *rb;
- blkif_vdev_t vdevice = destroy->vdevice;
-
- blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
- destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
-
- found:
- spin_lock(&blkif->vbd_lock);
- rb_erase(rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
- bdev_put(vbd->bdev);
- kfree(vbd);
+ if (vbd->bdev)
+ bdev_put(vbd->bdev);
+ vbd->bdev = NULL;
}
-
-
-void destroy_all_vbds(blkif_t *blkif)
-{
- struct vbd *vbd;
- rb_node_t *rb;
-
- spin_lock(&blkif->vbd_lock);
-
- while ( (rb = blkif->vbd_rb.rb_node) != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- rb_erase(rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
- bdev_put(vbd->bdev);
- kfree(vbd);
- spin_lock(&blkif->vbd_lock);
- }
-
- spin_unlock(&blkif->vbd_lock);
-}
-
-
-static void vbd_probe_single(
- blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
-{
- vbd_info->device = vbd->vdevice;
- vbd_info->info = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
- vbd_info->capacity = vbd_sz(vbd);
- vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
-}
-
-
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
-{
- int rc = 0, nr_vbds = 0;
- rb_node_t *rb;
-
- spin_lock(&blkif->vbd_lock);
-
- if ( (rb = blkif->vbd_rb.rb_node) == NULL )
- goto out;
-
- new_subtree:
- /* STEP 1. Find least node (it'll be left-most). */
- while ( rb->rb_left != NULL )
- rb = rb->rb_left;
-
- for ( ; ; )
- {
- /* STEP 2. Dealt with left subtree. Now process current node. */
- vbd_probe_single(blkif, &vbd_info[nr_vbds],
- rb_entry(rb, struct vbd, rb));
- if ( ++nr_vbds == max_vbds )
- goto out;
-
- /* STEP 3. Process right subtree, if any. */
- if ( rb->rb_right != NULL )
- {
- rb = rb->rb_right;
- goto new_subtree;
- }
-
- /* STEP 4. Done both subtrees. Head back through ancesstors. */
- for ( ; ; )
- {
- /* We're done when we get back to the root node. */
- if ( rb->rb_parent == NULL )
- goto out;
- /* If we are left of parent, then parent is next to process. */
- if ( rb->rb_parent->rb_left == rb )
- break;
- /* If we are right of parent, then we climb to grandparent. */
- rb = rb->rb_parent;
- }
-
- rb = rb->rb_parent;
- }
-
- out:
- spin_unlock(&blkif->vbd_lock);
- return (rc == 0) ? nr_vbds : rc;
-}
-
int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
{
- struct vbd *vbd;
- rb_node_t *rb;
- int rc = -EACCES;
+ struct vbd *vbd = &blkif->vbd;
+ int rc = -EACCES;
- /* Take the vbd_lock because another thread could be updating the tree. */
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- if ( req->dev < vbd->vdevice )
- rb = rb->rb_left;
- else if ( req->dev > vbd->vdevice )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- DPRINTK("vbd_translate; domain %u attempted to access "
- "non-existent VBD.\n", blkif->domid);
- rc = -ENODEV;
- goto out;
-
- found:
-
- if ( (operation == WRITE) && vbd->readonly )
+ if ((operation == WRITE) && vbd->readonly)
goto out;
- if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
goto out;
req->dev = vbd->pdevice;
@@ -290,6 +93,5 @@
rc = 0;
out:
- spin_unlock(&blkif->vbd_lock);
return rc;
}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Aug 25
22:53:20 2005
@@ -53,47 +53,26 @@
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
#include <asm-xen/evtchn.h>
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xenbus.h>
#include <asm-xen/xen-public/grant_table.h>
#include <asm-xen/gnttab.h>
-#endif
typedef unsigned char byte; /* from linux/ide.h */
/* Control whether runtime update of vbds is enabled. */
#define ENABLE_VBD_UPDATE 1
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
-
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
-
-static int blkif_handle = 0;
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_irq = 0;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
-
-static blkif_front_ring_t blk_ring;
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED 1
+
+static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
#define GRANTREF_INVALID (1<<15)
-#endif
static struct blk_shadow {
blkif_request_t req;
@@ -104,9 +83,9 @@
static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
-static void kick_pending_request_queues(void);
-
-int __init xlblk_init(void);
+static void kick_pending_request_queues(struct blkfront_info *info);
+
+static int __init xlblk_init(void);
static void blkif_completion(struct blk_shadow *s);
@@ -131,7 +110,7 @@
/* Kernel-specific definitions used in the common code */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER()
+#define DISABLE_SCATTERGATHER()
#else
static int sg_operation = -1;
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
@@ -139,38 +118,22 @@
static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
{
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- int i;
-#endif
s->req = *r;
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- for ( i = 0; i < r->nr_segments; i++ )
- s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]);
-#endif
}
static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
{
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- int i;
-#endif
*r = s->req;
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- for ( i = 0; i < s->req.nr_segments; i++ )
- r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]);
-#endif
-}
-
-
-static inline void flush_requests(void)
+}
+
+
+static inline void flush_requests(struct blkfront_info *info)
{
DISABLE_SCATTERGATHER();
- RING_PUSH_REQUESTS(&blk_ring);
- notify_via_evtchn(blkif_evtchn);
+ RING_PUSH_REQUESTS(&info->ring);
+ notify_via_evtchn(info->evtchn);
}
@@ -180,58 +143,45 @@
module_init(xlblk_init);
-#if ENABLE_VBD_UPDATE
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
- static DECLARE_WORK(update_tq, update_vbds_task, NULL);
- schedule_work(&update_tq);
-}
-#endif /* ENABLE_VBD_UPDATE */
-
-static struct xlbd_disk_info *head_waiting = NULL;
-static void kick_pending_request_queues(void)
-{
- struct xlbd_disk_info *di;
- while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) )
- {
- head_waiting = di->next_waiting;
- di->next_waiting = NULL;
- /* Re-enable calldowns. */
- blk_start_queue(di->rq);
- /* Kick things off immediately. */
- do_blkif_request(di->rq);
- }
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+ if (!RING_FULL(&info->ring)) {
+ /* Re-enable calldowns. */
+ blk_start_queue(info->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(info->rq);
+ }
+}
+
+static void blkif_restart_queue(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ schedule_work(&info->work);
}
int blkif_open(struct inode *inode, struct file *filep)
{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /* Update of usage count is protected by per-device semaphore. */
- di->mi->usage++;
-
- return 0;
+ // struct gendisk *gd = inode->i_bdev->bd_disk;
+ // struct xlbd_disk_info *di = (struct xlbd_disk_info
*)gd->private_data;
+
+ /* Update of usage count is protected by per-device semaphore. */
+ // di->mi->usage++;
+
+ return 0;
}
int blkif_release(struct inode *inode, struct file *filep)
{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if ( --di->mi->usage == 0 )
- vbd_update();
-
+ /* FIXME: This is where we can actually free up majors, etc. --RR */
return 0;
}
@@ -242,8 +192,8 @@
int i;
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long)argument, inode->i_rdev);
-
+ command, (long)argument, inode->i_rdev);
+
switch ( command )
{
case HDIO_GETGEO:
@@ -269,7 +219,7 @@
/*
* blkif_queue_request
*
- * request block io
+ * request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -278,7 +228,7 @@
*/
static int blkif_queue_request(struct request *req)
{
- struct xlbd_disk_info *di = req->rq_disk->private_data;
+ struct blkfront_info *info = req->rq_disk->private_data;
unsigned long buffer_ma;
blkif_request_t *ring_req;
struct bio *bio;
@@ -286,23 +236,29 @@
int idx;
unsigned long id;
unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
int ref;
-#endif
-
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+ grant_ref_t gref_head;
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
+ if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ &gref_head) < 0) {
+ gnttab_request_free_callback(&info->callback,
+ blkif_restart_queue_callback, info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
blk_shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
- BLKIF_OP_READ;
+ ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
- ring_req->device = di->xd_device;
+ ring_req->handle = info->handle;
ring_req->nr_segments = 0;
rq_for_each_bio(bio, req)
@@ -314,38 +270,35 @@
buffer_ma = page_to_phys(bvec->bv_page);
fsect = bvec->bv_offset >> 9;
lsect = fsect + (bvec->bv_len >> 9) - 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
rq_data_dir(req) );
blk_shadow[id].frame[ring_req->nr_segments] =
buffer_ma >> PAGE_SHIFT;
- ring_req->frame_and_sects[ring_req->nr_segments++] =
+ ring_req->frame_and_sects[ring_req->nr_segments] =
blkif_fas_from_gref(ref, fsect, lsect);
-#else
- ring_req->frame_and_sects[ring_req->nr_segments++] =
- blkif_fas(buffer_ma, fsect, lsect);
-#endif
+ ring_req->nr_segments++;
}
}
- blk_ring.req_prod_pvt++;
-
+ info->ring.req_prod_pvt++;
+
/* Keep a private copy so we can reissue requests when recovering. */
pickle_request(&blk_shadow[id], ring_req);
+ gnttab_free_grant_references(gref_head);
+
return 0;
}
-
/*
* do_blkif_request
@@ -353,24 +306,26 @@
*/
void do_blkif_request(request_queue_t *rq)
{
- struct xlbd_disk_info *di;
+ struct blkfront_info *info = NULL;
struct request *req;
int queued;
- DPRINTK("Entered do_blkif_request\n");
+ DPRINTK("Entered do_blkif_request\n");
queued = 0;
while ( (req = elv_next_request(rq)) != NULL )
{
+ info = req->rq_disk->private_data;
+
if ( !blk_fs_request(req) )
{
end_request(req, 0);
continue;
}
- if ( RING_FULL(&blk_ring) )
- goto wait;
+ if (RING_FULL(&info->ring))
+ goto wait;
DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
req, req->cmd, req->sector, req->current_nr_sectors,
@@ -378,25 +333,19 @@
rq_data_dir(req) ? "write" : "read");
blkdev_dequeue_request(req);
- if ( blkif_queue_request(req) )
- {
+ if (blkif_queue_request(req)) {
+ blk_requeue_request(rq, req);
wait:
- di = req->rq_disk->private_data;
- if ( di->next_waiting == NULL )
- {
- di->next_waiting = head_waiting;
- head_waiting = di;
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- }
- break;
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
+ break;
}
queued++;
}
if ( queued != 0 )
- flush_requests();
+ flush_requests(info);
}
@@ -405,25 +354,24 @@
struct request *req;
blkif_response_t *bret;
RING_IDX i, rp;
- unsigned long flags;
-
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
- unlikely(recovery) )
- {
+ unsigned long flags;
+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
spin_unlock_irqrestore(&blkif_io_lock, flags);
return IRQ_HANDLED;
}
-
- rp = blk_ring.sring->rsp_prod;
+
+ rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
+ for ( i = info->ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
- bret = RING_GET_RESPONSE(&blk_ring, i);
+ bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
req = (struct request *)blk_shadow[id].request;
@@ -440,25 +388,21 @@
bret->status);
if ( unlikely(end_that_request_first
- (req,
+ (req,
(bret->status == BLKIF_RSP_OKAY),
req->hard_nr_sectors)) )
BUG();
end_that_request_last(req);
break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
default:
BUG();
}
}
- blk_ring.rsp_cons = i;
-
- kick_pending_request_queues();
+ info->ring.rsp_cons = i;
+
+ kick_pending_request_queues(info);
spin_unlock_irqrestore(&blkif_io_lock, flags);
@@ -484,56 +428,34 @@
#define blkif_io_lock io_request_lock
/*============================================================================*/
-#if ENABLE_VBD_UPDATE
-
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state.
- */
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
- static struct tq_struct update_tq;
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
-}
-
-#endif /* ENABLE_VBD_UPDATE */
-/*============================================================================*/
-
static void kick_pending_request_queues(void)
{
/* We kick pending request queues if the ring is reasonably empty. */
- if ( (nr_pending != 0) &&
- (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
+ if ( (nr_pending != 0) &&
+ (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) )
{
/* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
+ while ( (nr_pending != 0) && !RING_FULL(&info->ring) )
do_blkif_request(pending_queues[--nr_pending]);
}
}
int blkif_open(struct inode *inode, struct file *filep)
{
- short xldev = inode->i_rdev;
+ short xldev = inode->i_rdev;
struct gendisk *gd = get_gendisk(xldev);
xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
- short minor = MINOR(xldev);
+ short minor = MINOR(xldev);
if ( gd->part[minor].nr_sects == 0 )
- {
+ {
/*
* Device either doesn't exist, or has zero capacity; we use a few
* cheesy heuristics to return the relevant error code
*/
if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
((minor & (gd->max_p - 1)) != 0) )
- {
+ {
/*
* We have a real device, but no such partition, or we just have a
* partition number so guess this is the problem.
@@ -542,16 +464,16 @@
}
else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
{
- /* This is a removable device => assume that media is missing. */
+ /* This is a removable device => assume that media is missing. */
return -ENOMEDIUM; /* media not present (this is a guess) */
- }
+ }
else
- {
+ {
/* Just go for the general 'no such device' error. */
return -ENODEV; /* no such device */
}
}
-
+
/* Update of usage count is protected by per-device semaphore. */
disk->usage++;
@@ -580,24 +502,24 @@
{
kdev_t dev = inode->i_rdev;
struct hd_geometry *geo = (struct hd_geometry *)argument;
- struct gendisk *gd;
- struct hd_struct *part;
+ struct gendisk *gd;
+ struct hd_struct *part;
int i;
unsigned short cylinders;
byte heads, sectors;
/* NB. No need to check permissions. That is done for us. */
-
+
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long) argument, dev);
-
+ command, (long) argument, dev);
+
gd = get_gendisk(dev);
- part = &gd->part[MINOR(dev)];
+ part = &gd->part[MINOR(dev)];
switch ( command )
{
case BLKGETSIZE:
- DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
+ DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
return put_user(part->nr_sects, (unsigned long *) argument);
case BLKGETSIZE64:
@@ -610,7 +532,7 @@
return blkif_revalidate(dev);
case BLKSSZGET:
- return hardsect_size[MAJOR(dev)][MINOR(dev)];
+ return hardsect_size[MAJOR(dev)][MINOR(dev)];
case BLKBSZGET: /* get block size */
DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
@@ -636,7 +558,7 @@
values consistent with the size of the device */
heads = 0xff;
- sectors = 0x3f;
+ sectors = 0x3f;
cylinders = part->nr_sects / (heads * sectors);
if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
@@ -646,7 +568,7 @@
return 0;
- case HDIO_GETGEO_BIG:
+ case HDIO_GETGEO_BIG:
DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
if (!argument) return -EINVAL;
@@ -654,7 +576,7 @@
values consistent with the size of the device */
heads = 0xff;
- sectors = 0x3f;
+ sectors = 0x3f;
cylinders = part->nr_sects / (heads * sectors);
if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
@@ -678,7 +600,7 @@
WPRINTK("ioctl %08x not supported by XL blkif\n", command);
return -ENOSYS;
}
-
+
return 0;
}
@@ -698,7 +620,7 @@
xl_disk_t *disk;
unsigned long capacity;
int i, rc = 0;
-
+
if ( (bd = bdget(dev)) == NULL )
return -EINVAL;
@@ -746,7 +668,7 @@
/*
* blkif_queue_request
*
- * request block io
+ * request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -758,7 +680,8 @@
char * buffer,
unsigned long sector_number,
unsigned short nr_sectors,
- kdev_t device)
+ kdev_t device,
+ blkif_vdev_t handle)
{
unsigned long buffer_ma = virt_to_bus(buffer);
unsigned long xid;
@@ -766,9 +689,7 @@
blkif_request_t *req;
struct buffer_head *bh;
unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
int ref;
-#endif
fsect = (buffer_ma & ~PAGE_MASK) >> 9;
lsect = fsect + nr_sectors - 1;
@@ -776,12 +697,12 @@
/* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
BUG();
- if ( lsect > 7 )
+ if ( lsect > ((PAGE_SIZE/512)-1) )
BUG();
buffer_ma &= PAGE_MASK;
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
switch ( operation )
@@ -789,7 +710,7 @@
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
- gd = get_gendisk(device);
+ gd = get_gendisk(device);
/*
* Update the sector_number we'll pass down as appropriate; note that
@@ -799,10 +720,10 @@
sector_number += gd->part[MINOR(device)].start_sect;
/*
- * If this unit doesn't consist of virtual partitions then we clear
+ * If this unit doesn't consist of virtual partitions then we clear
* the partn bits from the device number.
*/
- if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
+ if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
GENHD_FL_VIRT_PARTNS) )
device &= ~(gd->max_p - 1);
@@ -810,21 +731,20 @@
(sg_dev == device) &&
(sg_next_sect == sector_number) )
{
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt - 1);
+ req = RING_GET_REQUEST(&info->ring,
+ info->ring.req_prod_pvt - 1);
bh = (struct buffer_head *)id;
-
+
bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
blk_shadow[req->id].request = (unsigned long)id;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
@@ -833,10 +753,6 @@
req->frame_and_sects[req->nr_segments] =
blkif_fas_from_gref(ref, fsect, lsect);
-#else
- req->frame_and_sects[req->nr_segments] =
- blkif_fas(buffer_ma, fsect, lsect);
-#endif
if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
sg_next_sect += nr_sectors;
else
@@ -847,7 +763,7 @@
return 0;
}
- else if ( RING_FULL(&blk_ring) )
+ else if ( RING_FULL(&info->ring) )
{
return 1;
}
@@ -864,7 +780,7 @@
}
/* Fill out a communications ring structure. */
- req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
xid = GET_ID_FROM_FREELIST();
blk_shadow[xid].request = (unsigned long)id;
@@ -872,31 +788,27 @@
req->id = xid;
req->operation = operation;
req->sector_number = (blkif_sector_t)sector_number;
- req->device = device;
+ req->handle = handle;
req->nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect);
-#else
- req->frame_and_sects[0] = blkif_fas(buffer_ma, fsect, lsect);
-#endif
-
- /* Keep a private copy so we can reissue requests when recovering. */
+
+ /* Keep a private copy so we can reissue requests when recovering. */
pickle_request(&blk_shadow[xid], req);
- blk_ring.req_prod_pvt++;
-
+ info->ring.req_prod_pvt++;
+
return 0;
}
@@ -911,13 +823,13 @@
struct buffer_head *bh, *next_bh;
int rw, nsect, full, queued = 0;
- DPRINTK("Entered do_blkif_request\n");
+ DPRINTK("Entered do_blkif_request\n");
while ( !rq->plugged && !list_empty(&rq->queue_head))
{
- if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
+ if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
goto out;
-
+
DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
req, req->cmd, req->sector,
req->current_nr_sectors, req->nr_sectors, req->bh);
@@ -938,16 +850,16 @@
full = blkif_queue_request(
(unsigned long)bh,
- (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
+ (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
if ( full )
- {
+ {
bh->b_reqnext = next_bh;
pending_queues[nr_pending++] = rq;
if ( unlikely(nr_pending >= MAX_PENDING) )
BUG();
- goto out;
+ goto out;
}
queued++;
@@ -955,7 +867,7 @@
/* Dequeue the buffer head from the request. */
nsect = bh->b_size >> 9;
bh = req->bh = next_bh;
-
+
if ( bh != NULL )
{
/* There's another buffer head to do. Update the request. */
@@ -985,27 +897,27 @@
static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
- RING_IDX i, rp;
- unsigned long flags;
+ RING_IDX i, rp;
+ unsigned long flags;
struct buffer_head *bh, *next_bh;
-
- spin_lock_irqsave(&io_request_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) )
{
spin_unlock_irqrestore(&io_request_lock, flags);
return;
}
- rp = blk_ring.sring->rsp_prod;
+ rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
+ for ( i = info->ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
blkif_response_t *bret;
-
- bret = RING_GET_RESPONSE(&blk_ring, i);
+
+ bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
bh = (struct buffer_head *)blk_shadow[id].request;
@@ -1037,8 +949,8 @@
}
}
- blk_ring.rsp_cons = i;
-
+ info->ring.rsp_cons = i;
+
kick_pending_request_queues();
spin_unlock_irqrestore(&io_request_lock, flags);
@@ -1048,157 +960,29 @@
/***************************** COMMON CODE *******************************/
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
- unsigned long address)
-{
- int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
-
- gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
-
- req->frame_and_sects[0] = blkif_fas_from_gref(ref, 0, (PAGE_SIZE/512)-1);
-
- blkif_control_send(req, rsp);
-}
-#endif
-
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags, id;
- blkif_request_t *req_d;
-
- retry:
- while ( RING_FULL(&blk_ring) )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- spin_lock_irqsave(&blkif_io_lock, flags);
- if ( RING_FULL(&blk_ring) )
- {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- goto retry;
- }
-
- DISABLE_SCATTERGATHER();
- req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
- *req_d = *req;
-
- id = GET_ID_FROM_FREELIST();
- req_d->id = id;
- blk_shadow[id].request = (unsigned long)req;
-
- pickle_request(&blk_shadow[id], req);
-
- blk_ring.req_prod_pvt++;
- flush_requests();
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- while ( !blkif_control_rsp_valid )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-
-/* Send a driver status notification to the domain controller. */
-static void send_driver_status(int ok)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
- .length = sizeof(blkif_fe_driver_status_t),
- };
- blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Tell the controller to bring up the interface. */
-static void blkif_send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- msg->handle = 0;
- msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- msg->shmem_ref = gnttab_claim_grant_reference( &gref_head, gref_terminal
);
- ASSERT( msg->shmem_ref != -ENOSPC );
- gnttab_grant_foreign_access_ref ( msg->shmem_ref , rdomid,
msg->shmem_frame, 0 );
-#endif
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_free(void)
+static void blkif_free(struct blkfront_info *info)
{
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&blkif_io_lock);
- recovery = 1;
- blkif_state = BLKIF_STATE_DISCONNECTED;
+ info->connected = BLKIF_STATE_DISCONNECTED;
spin_unlock_irq(&blkif_io_lock);
/* Free resources associated with old device channel. */
- if ( blk_ring.sring != NULL )
- {
- free_page((unsigned long)blk_ring.sring);
- blk_ring.sring = NULL;
- }
- free_irq(blkif_irq, NULL);
- blkif_irq = 0;
-
- unbind_evtchn_from_irq(blkif_evtchn);
- blkif_evtchn = 0;
-}
-
-static void blkif_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_disconnect(void)
-{
- blkif_sring_t *sring;
-
- if ( blk_ring.sring != NULL )
- free_page((unsigned long)blk_ring.sring);
-
- sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
- blkif_state = BLKIF_STATE_DISCONNECTED;
- blkif_send_interface_connect();
-}
-
-static void blkif_reset(void)
-{
- blkif_free();
- blkif_disconnect();
-}
-
-static void blkif_recover(void)
+ if ( info->ring.sring != NULL )
+ {
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = NULL;
+ }
+ unbind_evtchn_from_irqhandler(info->evtchn, NULL);
+ info->evtchn = 0;
+}
+
+static void blkif_recover(struct blkfront_info *info)
{
int i;
blkif_request_t *req;
struct blk_shadow *copy;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
int j;
-#endif
/* Stage 1: Make a safe copy of the shadow state. */
copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
@@ -1209,7 +993,7 @@
memset(&blk_shadow, 0, sizeof(blk_shadow));
for ( i = 0; i < BLK_RING_SIZE; i++ )
blk_shadow[i].req.id = i+1;
- blk_shadow_free = blk_ring.req_prod_pvt;
+ blk_shadow_free = info->ring.req_prod_pvt;
blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
/* Stage 3: Find pending requests and requeue them. */
@@ -1221,195 +1005,339 @@
/* Grab a request slot and unpickle shadow state into it. */
req = RING_GET_REQUEST(
- &blk_ring, blk_ring.req_prod_pvt);
+ &info->ring, info->ring.req_prod_pvt);
unpickle_request(req, ©[i]);
/* We get a new request id, and must reset the shadow state. */
req->id = GET_ID_FROM_FREELIST();
memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i]));
-#ifdef CONFIG_XEN_BLKDEV_GRANT
/* Rewrite any grant references invalidated by suspend/resume. */
for ( j = 0; j < req->nr_segments; j++ )
{
if ( req->frame_and_sects[j] & GRANTREF_INVALID )
gnttab_grant_foreign_access_ref(
blkif_gref_from_fas(req->frame_and_sects[j]),
- rdomid,
+ info->backend_id,
blk_shadow[req->id].frame[j],
rq_data_dir((struct request *)
blk_shadow[req->id].request));
req->frame_and_sects[j] &= ~GRANTREF_INVALID;
}
blk_shadow[req->id].req = *req;
-#endif
-
- blk_ring.req_prod_pvt++;
+
+ info->ring.req_prod_pvt++;
}
kfree(copy);
recovery = 0;
- /* blk_ring->req_prod will be set when we flush_requests().*/
+ /* info->ring->req_prod will be set when we flush_requests().*/
wmb();
/* Kicks things back into life. */
- flush_requests();
+ flush_requests(info);
/* Now safe to left other people use the interface. */
- blkif_state = BLKIF_STATE_CONNECTED;
-}
-
-static void blkif_connect(blkif_fe_interface_status_t *status)
+ info->connected = BLKIF_STATE_CONNECTED;
+}
+
+static void blkif_connect(struct blkfront_info *info, u16 evtchn)
{
int err = 0;
- blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-
- err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
- if ( err )
- {
- WPRINTK("request_irq failed (err=%d)\n", err);
+ info->evtchn = evtchn;
+
+ err = bind_evtchn_to_irqhandler(
+ info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+ if ( err != 0 )
+ {
+ WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
return;
}
-
- if ( recovery )
- {
- blkif_recover();
- }
- else
- {
- /* Transition to connected in case we need to do
- * a partition probe on a whole disk. */
- blkif_state = BLKIF_STATE_CONNECTED;
-
- /* Probe for discs attached to the interface. */
- xlvbd_init();
- }
-
- /* Kick pending requests. */
- spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&blkif_io_lock);
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
- DPRINTK(" Unexpected blkif status %u in state %u\n",
- status->status, blkif_state);
-}
-
-static void blkif_status(blkif_fe_interface_status_t *status)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- rdomid = status->domid; /* need to set rdomid early */
-#endif
-
- if ( status->handle != blkif_handle )
- {
- WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
- unexpected(status);
- return;
- }
-
- switch ( status->status )
- {
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_close();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- blkif_disconnect();
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- /* unexpected(status); */ /* occurs during suspend/resume */
- blkif_reset();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- blkif_disconnect();
- blkif_connect(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- blkif_connect(status);
- break;
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_connect(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- case BLKIF_STATE_DISCONNECTED:
- unexpected(status);
- break;
- case BLKIF_STATE_CONNECTED:
- vbd_update();
- break;
- }
- break;
-
- default:
- WPRINTK(" Invalid blkif status: %d\n", status->status);
- break;
- }
-}
-
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- blkif_status((blkif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
- default:
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-int wait_for_blkif(void)
+}
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+ struct blkfront_info *info;
+ unsigned int binfo;
+ unsigned long sectors, sector_size;
+ int err;
+
+ info = container_of(watch, struct blkfront_info, watch);
+ node += strlen(watch->node);
+
+ /* FIXME: clean up when error on the other end. */
+ if (info->connected == BLKIF_STATE_CONNECTED)
+ return;
+
+ err = xenbus_gather(watch->node,
+ "sectors", "%lu", §ors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", §or_size,
+ NULL);
+ if (err) {
+ xenbus_dev_error(info->xbdev, err,
+ "reading backend fields at %s", watch->node);
+ return;
+ }
+
+ xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ blkif_state = BLKIF_STATE_CONNECTED;
+
+ xenbus_dev_ok(info->xbdev);
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+}
+
+static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
+{
+ blkif_sring_t *sring;
+ evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+ int err;
+
+ sring = (void *)__get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = gnttab_grant_foreign_access(info->backend_id,
+ virt_to_mfn(info->ring.sring), 0);
+ if (err == -ENOSPC) {
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = 0;
+ xenbus_dev_error(dev, err, "granting access to ring page");
+ return err;
+ }
+ info->ring_ref = err;
+
+ op.u.alloc_unbound.dom = info->backend_id;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ gnttab_end_foreign_access(info->ring_ref, 0);
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = 0;
+ xenbus_dev_error(dev, err, "allocating event channel");
+ return err;
+ }
+ blkif_connect(info, op.u.alloc_unbound.port);
+ return 0;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ char *backend;
+ const char *message;
+ int err;
+
+ backend = NULL;
+ err = xenbus_gather(dev->nodename,
+ "backend-id", "%i", &info->backend_id,
+ "backend", NULL, &backend,
+ NULL);
+ if (XENBUS_EXIST_ERR(err))
+ goto out;
+ if (backend && strlen(backend) == 0) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
+ dev->nodename);
+ goto out;
+ }
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, info);
+ if (err) {
+ xenbus_dev_error(dev, err, "setting up block ring");
+ goto out;
+ }
+
+ err = xenbus_transaction_start(dev->nodename);
+ if (err) {
+ xenbus_dev_error(dev, err, "starting transaction");
+ goto destroy_blkring;
+ }
+
+ err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(dev->nodename,
+ "event-channel", "%u", info->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ info->backend = backend;
+ backend = NULL;
+
+ info->watch.node = info->backend;
+ info->watch.callback = watch_for_status;
+ err = register_xenbus_watch(&info->watch);
+ if (err) {
+ message = "registering watch on backend";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(0);
+ if (err) {
+ xenbus_dev_error(dev, err, "completing transaction");
+ goto destroy_blkring;
+ }
+
+ out:
+ if (backend)
+ kfree(backend);
+ return err;
+
+ abort_transaction:
+ xenbus_transaction_end(1);
+ /* Have to do this *outside* transaction. */
+ xenbus_dev_error(dev, err, "%s", message);
+ destroy_blkring:
+ blkif_free(info);
+ goto out;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+ We place an event channel and shared frame entries.
+ We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ struct blkfront_info *info;
+ int vdevice;
+
+ /* FIXME: Use dynamic device id if this is not set. */
+ err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+ if (XENBUS_EXIST_ERR(err))
+ return err;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading virtual-device");
+ return err;
+ }
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_error(dev, err, "allocating info structure");
+ return err;
+ }
+ info->xbdev = dev;
+ info->vdevice = vdevice;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+ info->mi = NULL;
+ INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
+
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+ dev->data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ kfree(info);
+ dev->data = NULL;
+ return err;
+ }
+
+ /* Call once in case entries already there. */
+ watch_for_status(&info->watch, info->watch.node);
+ return 0;
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+
+ if (info->backend)
+ unregister_xenbus_watch(&info->watch);
+
+ if (info->mi)
+ xlvbd_del(info);
+
+ blkif_free(info);
+
+ kfree(info->backend);
+ kfree(info);
+
+ return 0;
+}
+
+static int blkfront_suspend(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+
+ unregister_xenbus_watch(&info->watch);
+ kfree(info->backend);
+ info->backend = NULL;
+
+ recovery = 1;
+ blkif_free(info);
+
+ return 0;
+}
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+ int err;
+
+ /* FIXME: Check geometry hasn't changed here... */
+ err = talk_to_backend(dev, info);
+ if (!err) {
+ blkif_recover(info);
+ }
+ return err;
+}
+
+static struct xenbus_driver blkfront = {
+ .name = "vbd",
+ .owner = THIS_MODULE,
+ .ids = blkfront_ids,
+ .probe = blkfront_probe,
+ .remove = blkfront_remove,
+ .resume = blkfront_resume,
+ .suspend = blkfront_suspend,
+};
+
+static void __init init_blk_xenbus(void)
+{
+ xenbus_register_device(&blkfront);
+}
+
+static int wait_for_blkif(void)
{
int err = 0;
int i;
- send_driver_status(1);
/*
- * We should read 'nr_interfaces' from response message and wait
- * for notifications before proceeding. For now we assume that we
- * will be notified of exactly one interface.
+ * We should figure out how many and which devices we need to
+ * proceed and only wait for those. For now, continue once the
+ * first device is around.
*/
- for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+ for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ )
{
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
@@ -1423,17 +1351,9 @@
return err;
}
-int __init xlblk_init(void)
+static int __init xlblk_init(void)
{
int i;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* A grant for every ring slot, plus one for the ring itself. */
- if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1,
- &gref_head, &gref_terminal) )
- return 1;
- printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
-#endif
if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
@@ -1447,46 +1367,17 @@
blk_shadow[i].req.id = i+1;
blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
+ init_blk_xenbus();
wait_for_blkif();
return 0;
}
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int i, j;
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
- blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
-#endif
- send_driver_status(1);
-}
-
static void blkif_completion(struct blk_shadow *s)
{
int i;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
for ( i = 0; i < s->req.nr_segments; i++ )
- gnttab_release_grant_reference(
- &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
-#else
- /* This is a hack to get the dirty logging bits set */
- if ( s->req.operation == BLKIF_OP_READ )
- {
- for ( i = 0; i < s->req.nr_segments; i++ )
- {
- unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT;
- unsigned long mfn = phys_to_machine_mapping[pfn];
- xen_machphys_update(mfn, pfn);
- }
- }
-#endif
-}
+ gnttab_free_grant_reference(
+ blkif_gref_from_fas(s->req.frame_and_sects[i]));
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Aug 25 22:53:20 2005
@@ -33,6 +33,7 @@
#define __XEN_DRIVERS_BLOCK_H__
#include <linux/config.h>
+#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -44,6 +45,8 @@
#include <linux/blkdev.h>
#include <linux/major.h>
#include <linux/devfs_fs_kernel.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xenbus.h>
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/xen-public/io/blkif.h>
#include <asm-xen/xen-public/io/ring.h>
@@ -77,11 +80,20 @@
#define DPRINTK_IOCTL(_f, _a...) ((void)0)
#endif
-struct xlbd_type_info {
- int partn_shift;
- int disks_per_major;
- char *devname;
- char *diskname;
+struct xlbd_type_info
+{
+ int partn_shift;
+ int disks_per_major;
+ char *devname;
+ char *diskname;
+};
+
+struct xlbd_major_info
+{
+ int major;
+ int index;
+ int usage;
+ struct xlbd_type_info *type;
};
/*
@@ -89,25 +101,27 @@
* hang in private_data off the gendisk structure. We may end up
* putting all kinds of interesting stuff here :-)
*/
-struct xlbd_major_info {
- int major;
- int index;
- int usage;
- struct xlbd_type_info *type;
+struct blkfront_info
+{
+ struct xenbus_device *xbdev;
+ /* We watch the backend */
+ struct xenbus_watch watch;
+ dev_t dev;
+ int vdevice;
+ blkif_vdev_t handle;
+ int connected;
+ char *backend;
+ int backend_id;
+ int ring_ref;
+ blkif_front_ring_t ring;
+ unsigned int evtchn;
+ struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ request_queue_t *rq;
+#endif
+ struct work_struct work;
+ struct gnttab_free_callback callback;
};
-
-struct xlbd_disk_info {
- int xd_device;
- struct xlbd_major_info *mi;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- struct xlbd_disk_info *next_waiting;
- request_queue_t *rq;
-#endif
-};
-
-typedef struct xen_block {
- int usage;
-} xen_block_t;
extern spinlock_t blkif_io_lock;
@@ -117,17 +131,10 @@
unsigned command, unsigned long argument);
extern int blkif_check(dev_t dev);
extern int blkif_revalidate(dev_t dev);
-extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-extern void blkif_control_probe_send(
- blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
-#endif
extern void do_blkif_request (request_queue_t *rq);
-extern void xlvbd_update_vbds(void);
-
/* Virtual block-device subsystem. */
-extern int xlvbd_init(void);
-extern void xlvbd_cleanup(void);
-
+int xlvbd_add(blkif_sector_t capacity, int device,
+ u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu Aug 25 22:53:20 2005
@@ -43,458 +43,269 @@
#define NUM_SCSI_MAJORS 9
#define NUM_VBD_MAJORS 1
-struct lvdisk
-{
- blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */
- blkif_vdev_t device; /* 8: Device number (opaque 16 bit value). */
- u16 info;
- struct list_head list;
+static struct xlbd_type_info xlbd_ide_type = {
+ .partn_shift = 6,
+ .disks_per_major = 2,
+ .devname = "ide",
+ .diskname = "hd",
};
-static struct xlbd_type_info xlbd_ide_type = {
- .partn_shift = 6,
- .disks_per_major = 2,
- .devname = "ide",
- .diskname = "hd",
+static struct xlbd_type_info xlbd_scsi_type = {
+ .partn_shift = 4,
+ .disks_per_major = 16,
+ .devname = "sd",
+ .diskname = "sd",
};
-static struct xlbd_type_info xlbd_scsi_type = {
- .partn_shift = 4,
- .disks_per_major = 16,
- .devname = "sd",
- .diskname = "sd",
+static struct xlbd_type_info xlbd_vbd_type = {
+ .partn_shift = 4,
+ .disks_per_major = 16,
+ .devname = "xvd",
+ .diskname = "xvd",
};
-static struct xlbd_type_info xlbd_vbd_type = {
- .partn_shift = 4,
- .disks_per_major = 16,
- .devname = "xvd",
- .diskname = "xvd",
-};
-
static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
- NUM_VBD_MAJORS];
-
-#define XLBD_MAJOR_IDE_START 0
-#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
-#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
-
-#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START
- 1
-#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START
- 1
-#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START
+ NUM_VBD_MAJORS - 1
+ NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START 0
+#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START
- 1
+#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START
- 1
+#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START +
NUM_VBD_MAJORS - 1
/* Information about our VBDs. */
#define MAX_VBDS 64
-struct list_head vbds_list;
-
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-static struct block_device_operations xlvbd_block_fops =
-{
- .owner = THIS_MODULE,
- .open = blkif_open,
- .release = blkif_release,
- .ioctl = blkif_ioctl,
+static LIST_HEAD(vbds_list);
+
+static struct block_device_operations xlvbd_block_fops =
+{
+ .owner = THIS_MODULE,
+ .open = blkif_open,
+ .release = blkif_release,
+ .ioctl = blkif_ioctl,
};
spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
-static struct lvdisk *xlvbd_device_alloc(void)
-{
- struct lvdisk *disk;
-
- disk = kmalloc(sizeof(*disk), GFP_KERNEL);
- if (disk != NULL) {
- memset(disk, 0, sizeof(*disk));
- INIT_LIST_HEAD(&disk->list);
- }
- return disk;
-}
-
-static void xlvbd_device_free(struct lvdisk *disk)
-{
- list_del(&disk->list);
- kfree(disk);
-}
-
-static vdisk_t *xlvbd_probe(int *ret)
-{
- blkif_response_t rsp;
- blkif_request_t req;
- vdisk_t *disk_info = NULL;
- unsigned long buf;
- int nr;
-
- buf = __get_free_page(GFP_KERNEL);
- if ((void *)buf == NULL)
- goto out;
-
- memset(&req, 0, sizeof(req));
- req.operation = BLKIF_OP_PROBE;
- req.nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- blkif_control_probe_send(&req, &rsp,
- (unsigned long)(virt_to_machine(buf)));
-#else
- req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0,
((PAGE_SIZE/512)-1);
-
- blkif_control_send(&req, &rsp);
-#endif
- if ( rsp.status <= 0 ) {
- WPRINTK("Could not probe disks (%d)\n", rsp.status);
- goto out;
- }
- nr = rsp.status;
- if ( nr > MAX_VBDS )
- nr = MAX_VBDS;
-
- disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL);
- if (disk_info != NULL)
- memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t));
-
- if (ret != NULL)
- *ret = nr;
-
-out:
- free_page(buf);
- return disk_info;
-}
-
-static struct xlbd_major_info *xlbd_alloc_major_info(
- int major, int minor, int index)
-{
- struct xlbd_major_info *ptr;
-
- ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
- if (ptr == NULL)
- return NULL;
-
- memset(ptr, 0, sizeof(struct xlbd_major_info));
-
- ptr->major = major;
-
- switch (index) {
- case XLBD_MAJOR_IDE_RANGE:
- ptr->type = &xlbd_ide_type;
- ptr->index = index - XLBD_MAJOR_IDE_START;
- break;
- case XLBD_MAJOR_SCSI_RANGE:
- ptr->type = &xlbd_scsi_type;
- ptr->index = index - XLBD_MAJOR_SCSI_START;
- break;
- case XLBD_MAJOR_VBD_RANGE:
- ptr->type = &xlbd_vbd_type;
- ptr->index = index - XLBD_MAJOR_VBD_START;
- break;
- }
-
- if (register_blkdev(ptr->major, ptr->type->devname)) {
- WPRINTK("can't get major %d with name %s\n",
- ptr->major, ptr->type->devname);
- kfree(ptr);
- return NULL;
- }
-
- devfs_mk_dir(ptr->type->devname);
- major_info[index] = ptr;
- return ptr;
-}
-
-static struct xlbd_major_info *xlbd_get_major_info(int device)
-{
- int major, minor, index;
-
- major = MAJOR_XEN(device);
- minor = MINOR_XEN(device);
-
- switch (major) {
- case IDE0_MAJOR: index = 0; break;
- case IDE1_MAJOR: index = 1; break;
- case IDE2_MAJOR: index = 2; break;
- case IDE3_MAJOR: index = 3; break;
- case IDE4_MAJOR: index = 4; break;
- case IDE5_MAJOR: index = 5; break;
- case IDE6_MAJOR: index = 6; break;
- case IDE7_MAJOR: index = 7; break;
- case IDE8_MAJOR: index = 8; break;
- case IDE9_MAJOR: index = 9; break;
- case SCSI_DISK0_MAJOR: index = 10; break;
- case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
- index = 11 + major - SCSI_DISK1_MAJOR;
- break;
- case SCSI_CDROM_MAJOR: index = 18; break;
- default: index = 19; break;
- }
-
- return ((major_info[index] != NULL) ? major_info[index] :
- xlbd_alloc_major_info(major, minor, index));
-}
-
-static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
-{
- request_queue_t *rq;
-
- rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
- if (rq == NULL)
- return -1;
-
- elevator_init(rq, "noop");
-
- /* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(rq, disk->sector_size);
- blk_queue_max_sectors(rq, 512);
-
- /* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
-
- /* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
- /* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
-
- gd->queue = rq;
-
- return 0;
-}
-
-struct gendisk *xlvbd_alloc_gendisk(
- struct xlbd_major_info *mi, int minor, vdisk_t *disk)
-{
- struct gendisk *gd;
- struct xlbd_disk_info *di;
- int nr_minors = 1;
-
- di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
- if (di == NULL)
- return NULL;
- memset(di, 0, sizeof(*di));
- di->mi = mi;
- di->xd_device = disk->device;
-
- if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
- nr_minors = 1 << mi->type->partn_shift;
-
- gd = alloc_disk(nr_minors);
- if (gd == NULL)
- goto out;
-
- if (nr_minors > 1)
- sprintf(gd->disk_name, "%s%c", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift));
- else
- sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift),
- minor & ((1 << mi->type->partn_shift) - 1));
-
- gd->major = mi->major;
- gd->first_minor = minor;
- gd->fops = &xlvbd_block_fops;
- gd->private_data = di;
- set_capacity(gd, disk->capacity);
-
- if (xlvbd_init_blk_queue(gd, disk)) {
- del_gendisk(gd);
- goto out;
- }
-
- di->rq = gd->queue;
-
- if (disk->info & VDISK_READONLY)
- set_disk_ro(gd, 1);
-
- if (disk->info & VDISK_REMOVABLE)
- gd->flags |= GENHD_FL_REMOVABLE;
-
- if (disk->info & VDISK_CDROM)
- gd->flags |= GENHD_FL_CD;
-
- add_disk(gd);
-
- return gd;
-
-out:
- kfree(di);
- return NULL;
-}
-
-static int xlvbd_device_add(struct list_head *list, vdisk_t *disk)
-{
- struct lvdisk *new;
- int minor;
- dev_t device;
- struct block_device *bd;
- struct gendisk *gd;
- struct xlbd_major_info *mi;
-
- mi = xlbd_get_major_info(disk->device);
- if (mi == NULL)
- return -EPERM;
-
- new = xlvbd_device_alloc();
- if (new == NULL)
- return -1;
- new->capacity = disk->capacity;
- new->device = disk->device;
- new->info = disk->info;
-
- minor = MINOR_XEN(disk->device);
- device = MKDEV(mi->major, minor);
-
- bd = bdget(device);
- if (bd == NULL)
- goto out;
-
- gd = xlvbd_alloc_gendisk(mi, minor, disk);
- if (gd == NULL)
- goto out_bd;
-
- list_add(&new->list, list);
-out_bd:
- bdput(bd);
-out:
- return 0;
-}
-
-static int xlvbd_device_del(struct lvdisk *disk)
-{
- dev_t device;
- struct block_device *bd;
- struct gendisk *gd;
- struct xlbd_disk_info *di;
- int ret = 0, unused;
- request_queue_t *rq;
-
- device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
-
- bd = bdget(device);
- if (bd == NULL)
- return -1;
-
- gd = get_gendisk(device, &unused);
- di = gd->private_data;
-
- if (di->mi->usage != 0) {
- WPRINTK("disk removal failed: used [dev=%x]\n", device);
- ret = -1;
- goto out;
- }
-
- rq = gd->queue;
- del_gendisk(gd);
- put_disk(gd);
- blk_cleanup_queue(rq);
-
- xlvbd_device_free(disk);
-out:
- bdput(bd);
- return ret;
-}
-
-static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk)
-{
- dev_t device;
- struct block_device *bd;
- struct gendisk *gd;
- int unused;
-
- if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info))
- return 0;
-
- device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device));
-
- bd = bdget(device);
- if (bd == NULL)
- return -1;
-
- gd = get_gendisk(device, &unused);
- set_capacity(gd, disk->capacity);
- ldisk->capacity = disk->capacity;
-
- bdput(bd);
-
- return 0;
-}
-
-void xlvbd_refresh(void)
-{
- vdisk_t *newdisks;
- struct list_head *tmp, *tmp2;
- struct lvdisk *disk;
- int i, nr;
-
- newdisks = xlvbd_probe(&nr);
- if (newdisks == NULL) {
- WPRINTK("failed to probe\n");
- return;
- }
-
- i = 0;
- list_for_each_safe(tmp, tmp2, &vbds_list) {
- disk = list_entry(tmp, struct lvdisk, list);
-
- for (i = 0; i < nr; i++) {
- if ( !newdisks[i].device )
- continue;
- if ( disk->device == newdisks[i].device ) {
- xlvbd_device_update(disk, &newdisks[i]);
- newdisks[i].device = 0;
- break;
- }
- }
- if (i == nr) {
- xlvbd_device_del(disk);
- newdisks[i].device = 0;
- }
- }
- for (i = 0; i < nr; i++)
- if ( newdisks[i].device )
- xlvbd_device_add(&vbds_list, &newdisks[i]);
- kfree(newdisks);
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
- xlvbd_refresh();
-}
-
-/*
- * Set up all the linux device goop for the virtual block devices
- * (vbd's) that we know about. Note that although from the backend
- * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
- * number, the domain creation tools conventionally allocate these
- * numbers to correspond to those used by 'real' linux -- this is just
- * for convenience as it means e.g. that the same /etc/fstab can be
- * used when booting with or without Xen.
- */
-int xlvbd_init(void)
-{
- int i, nr;
- vdisk_t *disks;
-
- INIT_LIST_HEAD(&vbds_list);
-
- memset(major_info, 0, sizeof(major_info));
-
- disks = xlvbd_probe(&nr);
- if (disks == NULL) {
- WPRINTK("failed to probe\n");
- return -1;
- }
-
- for (i = 0; i < nr; i++)
- xlvbd_device_add(&vbds_list, &disks[i]);
-
- kfree(disks);
- return 0;
-}
+static struct xlbd_major_info *
+xlbd_alloc_major_info(int major, int minor, int index)
+{
+ struct xlbd_major_info *ptr;
+
+ ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+ if (ptr == NULL)
+ return NULL;
+
+ memset(ptr, 0, sizeof(struct xlbd_major_info));
+
+ ptr->major = major;
+
+ switch (index) {
+ case XLBD_MAJOR_IDE_RANGE:
+ ptr->type = &xlbd_ide_type;
+ ptr->index = index - XLBD_MAJOR_IDE_START;
+ break;
+ case XLBD_MAJOR_SCSI_RANGE:
+ ptr->type = &xlbd_scsi_type;
+ ptr->index = index - XLBD_MAJOR_SCSI_START;
+ break;
+ case XLBD_MAJOR_VBD_RANGE:
+ ptr->type = &xlbd_vbd_type;
+ ptr->index = index - XLBD_MAJOR_VBD_START;
+ break;
+ }
+
+ printk("Registering block device major %i\n", ptr->major);
+ if (register_blkdev(ptr->major, ptr->type->devname)) {
+ WPRINTK("can't get major %d with name %s\n",
+ ptr->major, ptr->type->devname);
+ kfree(ptr);
+ return NULL;
+ }
+
+ devfs_mk_dir(ptr->type->devname);
+ major_info[index] = ptr;
+ return ptr;
+}
+
+static struct xlbd_major_info *
+xlbd_get_major_info(int vdevice)
+{
+ struct xlbd_major_info *mi;
+ int major, minor, index;
+
+ major = BLKIF_MAJOR(vdevice);
+ minor = BLKIF_MINOR(vdevice);
+
+ switch (major) {
+ case IDE0_MAJOR: index = 0; break;
+ case IDE1_MAJOR: index = 1; break;
+ case IDE2_MAJOR: index = 2; break;
+ case IDE3_MAJOR: index = 3; break;
+ case IDE4_MAJOR: index = 4; break;
+ case IDE5_MAJOR: index = 5; break;
+ case IDE6_MAJOR: index = 6; break;
+ case IDE7_MAJOR: index = 7; break;
+ case IDE8_MAJOR: index = 8; break;
+ case IDE9_MAJOR: index = 9; break;
+ case SCSI_DISK0_MAJOR: index = 10; break;
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+ index = 11 + major - SCSI_DISK1_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: index = 18; break;
+ default: index = 19; break;
+ }
+
+ mi = ((major_info[index] != NULL) ? major_info[index] :
+ xlbd_alloc_major_info(major, minor, index));
+ mi->usage++;
+ return mi;
+}
+
+static void
+xlbd_put_major_info(struct xlbd_major_info *mi)
+{
+ mi->usage--;
+ /* XXX: release major if 0 */
+}
+
+static int
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+ request_queue_t *rq;
+
+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+ if (rq == NULL)
+ return -1;
+
+ elevator_init(rq, "noop");
+
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
+ blk_queue_hardsect_size(rq, sector_size);
+ blk_queue_max_sectors(rq, 512);
+
+ /* Each segment in a request is up to an aligned page in size. */
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+ /* Ensure a merged request will fit in a single I/O ring slot. */
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ /* Make sure buffer addresses are sector-aligned. */
+ blk_queue_dma_alignment(rq, 511);
+
+ gd->queue = rq;
+
+ return 0;
+}
+
+static int
+xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
+ u16 vdisk_info, u16 sector_size,
+ struct blkfront_info *info)
+{
+ struct gendisk *gd;
+ struct xlbd_major_info *mi;
+ int nr_minors = 1;
+ int err = -ENODEV;
+
+ mi = xlbd_get_major_info(vdevice);
+ if (mi == NULL)
+ goto out;
+ info->mi = mi;
+
+ if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+ nr_minors = 1 << mi->type->partn_shift;
+
+ gd = alloc_disk(nr_minors);
+ if (gd == NULL)
+ goto out;
+
+ if (nr_minors > 1)
+ sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+ 'a' + mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift));
+ else
+ sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+ 'a' + mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift),
+ minor & ((1 << mi->type->partn_shift) - 1));
+
+ gd->major = mi->major;
+ gd->first_minor = minor;
+ gd->fops = &xlvbd_block_fops;
+ gd->private_data = info;
+ set_capacity(gd, capacity);
+
+ if (xlvbd_init_blk_queue(gd, sector_size)) {
+ del_gendisk(gd);
+ goto out;
+ }
+
+ info->rq = gd->queue;
+
+ if (vdisk_info & VDISK_READONLY)
+ set_disk_ro(gd, 1);
+
+ if (vdisk_info & VDISK_REMOVABLE)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
+ if (vdisk_info & VDISK_CDROM)
+ gd->flags |= GENHD_FL_CD;
+
+ add_disk(gd);
+
+ return 0;
+
+ out:
+ if (mi)
+ xlbd_put_major_info(mi);
+ return err;
+}
+
+int
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+ u16 sector_size, struct blkfront_info *info)
+{
+ struct block_device *bd;
+ int err = 0;
+
+ info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
+
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ return -ENODEV;
+
+ err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
+ vdisk_info, sector_size, info);
+
+ bdput(bd);
+ return err;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ int unused;
+ request_queue_t *rq;
+
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ return;
+
+ gd = get_gendisk(info->dev, &unused);
+ rq = gd->queue;
+
+ del_gendisk(gd);
+ put_disk(gd);
+ xlbd_put_major_info(info->mi);
+ info->mi = NULL;
+ blk_cleanup_queue(rq);
+
+ bdput(bd);
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Aug 25 22:53:20 2005
@@ -23,6 +23,9 @@
blkif_be_driver_status_t be_st;
printk(KERN_INFO "Initialising Xen block tap device\n");
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ printk(KERN_INFO "Block tap is using grant tables.\n");
+#endif
DPRINTK(" tap - Backend connection init:\n");
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Thu Aug 25 22:53:20 2005
@@ -71,7 +71,6 @@
/* Physical parameters of the comms window. */
unsigned long shmem_frame;
unsigned int evtchn;
- int irq;
/* Comms information. */
blkif_back_ring_t blk_ring;
@@ -86,6 +85,11 @@
spinlock_t blk_ring_lock;
atomic_t refcnt;
struct work_struct work;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ u16 shmem_handle;
+ unsigned long shmem_vaddr;
+ grant_ref_t shmem_ref;
+#endif
} blkif_t;
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
@@ -104,8 +108,6 @@
blkif_t *blkif;
unsigned long id;
int nr_pages;
- unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int next_free;
} active_req_t;
@@ -173,32 +175,7 @@
/* -------[ Mappings to User VMA ]------------------------------------ */
-#define MAX_PENDING_REQS 64
#define BATCH_PER_DOMAIN 16
-extern struct vm_area_struct *blktap_vma;
-
-/* The following are from blkback.c and should probably be put in a
- * header and included from there.
- * The mmap area described here is where attached data pages eill be mapped.
- */
-
-extern unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-
-/* immediately before the mmap area, we have a bunch of pages reserved
- * for shared memory rings.
- */
-
-#define RING_PAGES 3 /* Ctrl, Front, and Back */
-extern unsigned long rings_vstart;
-
/* -------[ Here be globals ]----------------------------------------- */
extern unsigned long blktap_mode;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Thu Aug
25 22:53:20 2005
@@ -9,6 +9,7 @@
*/
#include "blktap.h"
+#include <asm-xen/evtchn.h>
static char *blkif_state_name[] = {
[BLKIF_STATE_CLOSED] = "closed",
@@ -16,16 +17,15 @@
[BLKIF_STATE_CONNECTED] = "connected",
};
-static char * blkif_status_name[] = {
+static char *blkif_status_name[] = {
[BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
[BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
[BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
};
-static unsigned blktap_be_irq;
-unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
-unsigned int blktap_be_evtchn;
+unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
+unsigned int blktap_be_evtchn;
/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
@@ -49,13 +49,21 @@
blkif_t *blkif = (blkif_t *)arg;
ctrl_msg_t cmsg;
blkif_be_disconnect_t disc;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_unmap_grant_ref op;
+#endif
/*
* These can't be done in blkif_disconnect() because at that point there
* may be outstanding requests at the disc whose asynchronous responses
* must still be notified to the remote driver.
*/
- unbind_evtchn_from_irq(blkif->evtchn);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ op.host_addr = blkif->shmem_vaddr;
+ op.handle = blkif->shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
vfree(blkif->blk_ring.sring);
/* Construct the deferred response message. */
@@ -179,8 +187,12 @@
unsigned int evtchn = connect->evtchn;
unsigned long shmem_frame = connect->shmem_frame;
struct vm_struct *vma;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref = connect->shmem_ref;
+#else
pgprot_t prot;
int error;
+#endif
blkif_t *blkif;
blkif_sring_t *sring;
@@ -201,24 +213,46 @@
return;
}
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ prot = __pgprot(_KERNPG_TABLE);
error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
prot, domid);
if ( error != 0 )
{
- WPRINTK("BE_CONNECT: error! (%d)\n", error);
if ( error == -ENOMEM )
connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT ) {
+ else if ( error == -EFAULT )
connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- WPRINTK("BE_CONNECT: MAPPING error!\n");
- }
else
connect->status = BLKIF_BE_STATUS_ERROR;
vfree(vma->addr);
return;
}
+#else
+ { /* Map: Use the Grant table reference */
+ struct gnttab_map_grant_ref op;
+ op.host_addr = VMALLOC_VMADDR(vma->addr);
+ op.flags = GNTMAP_host_map;
+ op.ref = ref;
+ op.dom = domid;
+
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+
+ handle = op.handle;
+
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->shmem_ref = ref;
+ blkif->shmem_handle = handle;
+ blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
+ }
+#endif
if ( blkif->status != DISCONNECTED )
{
@@ -232,12 +266,12 @@
BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
blkif->evtchn = evtchn;
- blkif->irq = bind_evtchn_to_irq(evtchn);
blkif->shmem_frame = shmem_frame;
blkif->status = CONNECTED;
blkif_get(blkif);
- request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+ bind_evtchn_to_irqhandler(
+ evtchn, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
connect->status = BLKIF_BE_STATUS_OKAY;
}
@@ -264,7 +298,7 @@
blkif->status = DISCONNECTING;
blkif->disconnect_rspid = rsp_id;
wmb(); /* Let other CPUs see the status change. */
- free_irq(blkif->irq, blkif);
+ unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
blkif_deschedule(blkif);
blkif_put(blkif);
return 0; /* Caller should not send response message. */
@@ -286,7 +320,7 @@
};
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+ msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring);
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
@@ -313,12 +347,11 @@
int err = 0;
blktap_be_evtchn = status->evtchn;
- blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn);
-
- err = request_irq(blktap_be_irq, blkif_ptbe_int,
- SA_SAMPLE_RANDOM, "blkif", NULL);
+
+ err = bind_evtchn_to_irqhandler(
+ blktap_be_evtchn, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL);
if ( err ) {
- WPRINTK("blkfront request_irq failed (%d)\n", err);
+ WPRINTK("blkfront bind_evtchn_to_irqhandler failed (%d)\n", err);
return;
} else {
/* transtion to connected in case we need to do a
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Thu Aug 25
22:53:20 2005
@@ -280,8 +280,6 @@
int more_to_do = 0;
int notify_be = 0, notify_user = 0;
- if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
-
/* lock both rings */
spin_lock_irqsave(&blkif_io_lock, flags);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Thu Aug 25
22:53:20 2005
@@ -5,7 +5,6 @@
* Control interface between the driver and a character device.
*
* Copyright (c) 2004, Andrew Warfield
- *
*/
#include <linux/config.h>
@@ -19,7 +18,11 @@
#include <linux/gfp.h>
#include <linux/poll.h>
#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
#include "blktap.h"
@@ -32,11 +35,6 @@
/* for poll: */
static wait_queue_head_t blktap_wait;
-
-/* Where things are inside the device mapping. */
-struct vm_area_struct *blktap_vma = NULL;
-unsigned long mmap_vstart;
-unsigned long rings_vstart;
/* Rings up to user space. */
static blkif_front_ring_t blktap_ufe_ring;
@@ -47,6 +45,61 @@
static int blktap_read_fe_ring(void);
static int blktap_read_be_ring(void);
+
+/* -------[ mmap region ]--------------------------------------------- */
+/*
+ * We use a big chunk of address space to map in-flight requests into,
+ * and export this region up to user-space. See the comments in blkback
+ * about this -- the two must be kept in sync if the tap is used as a
+ * passthrough.
+ */
+
+#define MAX_PENDING_REQS 64
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+#define RING_PAGES 3 /* Ctrl, Front, and Back */
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma = NULL;
+unsigned long mmap_vstart; /* Kernel pages for mapping in data. */
+unsigned long rings_vstart; /* start of mmaped vma */
+unsigned long user_vstart; /* start of user mappings */
+
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg) \
+ ( _start + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+/* -------[ grant handles ]------------------------------------------- */
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+ u16 kernel;
+ u16 user;
+};
+static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+ (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKTAP_INVALID_HANDLE(_g) \
+ (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
+ (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+ } while(0)
+
+#endif
+
+
/* -------[ blktap vm ops ]------------------------------------------- */
static struct page *blktap_nopage(struct vm_area_struct *vma,
@@ -76,8 +129,6 @@
if ( test_and_set_bit(0, &blktap_dev_inuse) )
return -EBUSY;
-
- printk(KERN_ALERT "blktap open.\n");
/* Allocate the ctrl ring. */
csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
@@ -128,7 +179,7 @@
blktap_dev_inuse = 0;
blktap_ring_ok = 0;
- printk(KERN_ALERT "blktap closed.\n");
+ DPRINTK(KERN_ALERT "blktap closed.\n");
/* Free the ring page. */
ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
@@ -140,7 +191,7 @@
ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
free_page((unsigned long) blktap_ube_ring.sring);
- /* Clear any active mappings. */
+ /* Clear any active mappings and free foreign map table */
if (blktap_vma != NULL) {
zap_page_range(blktap_vma, blktap_vma->vm_start,
blktap_vma->vm_end - blktap_vma->vm_start, NULL);
@@ -151,21 +202,36 @@
}
/* Note on mmap:
- * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio
- * work to do direct page access from userspace, this ended up being a
- * problem. The bigger issue seems to be that there is no way to map
- * a foreign page in to user space and have the virtual address of that
- * page map sanely down to a mfn.
- * Removing the VM_IO flag results in a loop in get_user_pages, as
- * pfn_valid() always fails on a foreign page.
+ * We need to map pages to user space in a way that will allow the block
+ * subsystem set up direct IO to them. This couldn't be done before, because
+ * there isn't really a sane way to make a user virtual address down to a
+ * physical address when the page belongs to another domain.
+ *
+ * My first approach was to map the page in to kernel memory, add an entry
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
+ * and then attempt to map that page up to user space. This is disallowed
+ * by xen though, which realizes that we don't really own the machine frame
+ * underlying the physical page.
+ *
+ * The new approach is to provide explicit support for this in xen linux.
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
+ * mapped from other vms. vma->vm_private_data is set up as a mapping
+ * from pages to actual page structs. There is a new clause in get_user_pages
+ * that does the right thing for this sort of mapping.
+ *
+ * blktap_mmap sets up this mapping. Most of the real work is done in
+ * blktap_write_fe_ring below.
*/
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
{
int size;
-
- printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ struct page **map;
+ int i;
+
+ DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
vma->vm_start, vma->vm_end);
+ vma->vm_flags |= VM_RESERVED;
vma->vm_ops = &blktap_vm_ops;
size = vma->vm_end - vma->vm_start;
@@ -177,10 +243,10 @@
}
size >>= PAGE_SHIFT;
- printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+ DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
rings_vstart = vma->vm_start;
- mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+ user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
/* Map the ring pages to the start of the region and reserve it. */
@@ -190,29 +256,44 @@
DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
if (remap_pfn_range(vma, vma->vm_start,
__pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
- }
+ PAGE_SIZE, vma->vm_page_prot))
+ goto fail;
DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE,
__pa(blktap_ube_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("be_ring: remap_pfn_range failure!\n");
- }
+ PAGE_SIZE, vma->vm_page_prot))
+ goto fail;
DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ),
__pa(blktap_ufe_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("fe_ring: remap_pfn_range failure!\n");
- }
-
+ PAGE_SIZE, vma->vm_page_prot))
+ goto fail;
+
+ /* Mark this VM as containing foreign pages, and set up mappings. */
+ map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+ * sizeof(struct page_struct*),
+ GFP_KERNEL);
+ if (map == NULL) goto fail;
+
+ for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+ map[i] = NULL;
+
+ vma->vm_private_data = map;
+ vma->vm_flags |= VM_FOREIGN;
+
blktap_vma = vma;
blktap_ring_ok = 1;
return 0;
+ fail:
+ /* Clear any active mappings. */
+ zap_page_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start, NULL);
+
+ return -ENOMEM;
}
static int blktap_ioctl(struct inode *inode, struct file *filp,
@@ -263,6 +344,8 @@
RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ||
RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+ flush_tlb_all();
+
RING_PUSH_REQUESTS(&blktap_uctrl_ring);
RING_PUSH_REQUESTS(&blktap_ufe_ring);
RING_PUSH_RESPONSES(&blktap_ube_ring);
@@ -289,11 +372,71 @@
/*-----[ Data to/from user space ]----------------------------------------*/
+static void fast_flush_area(int idx, int nr_pages)
+{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+ unsigned int i, op = 0;
+ struct grant_handle_pair *handle;
+ unsigned long ptep;
+
+ for (i=0; i<nr_pages; i++)
+ {
+ handle = &pending_handle(idx, i);
+ if (!BLKTAP_INVALID_HANDLE(handle))
+ {
+
+ unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+ unmap[op].dev_bus_addr = 0;
+ unmap[op].handle = handle->kernel;
+ op++;
+
+ if (create_lookup_pte_addr(blktap_vma->vm_mm,
+ MMAP_VADDR(user_vstart, idx, i),
+ &ptep) !=0) {
+ DPRINTK("Couldn't get a pte addr!\n");
+ return;
+ }
+ unmap[op].host_addr = ptep;
+ unmap[op].dev_bus_addr = 0;
+ unmap[op].handle = handle->user;
+ op++;
+
+ BLKTAP_INVALIDATE_HANDLE(handle);
+ }
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, unmap, op)))
+ BUG();
+#else
+ multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int i;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i),
+ __pte(0), 0);
+ }
+
+ mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+ BUG();
+#endif
+}
+
int blktap_write_fe_ring(blkif_request_t *req)
{
blkif_request_t *target;
- int error, i;
+ int i, ret = 0;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+ int op;
+#else
+ unsigned long remap_prot;
+ multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1];
+ mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
/*
* This is called to pass a request from the real frontend domain's
@@ -310,29 +453,184 @@
return 0;
}
- target = RING_GET_REQUEST(&blktap_ufe_ring,
- blktap_ufe_ring.req_prod_pvt);
+ flush_cache_all(); /* a noop on intel... */
+
+ target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
memcpy(target, req, sizeof(*req));
- /* Attempt to map the foreign pages directly in to the application */
+ /* Map the foreign pages directly in to the application */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ op = 0;
for (i=0; i<target->nr_segments; i++) {
- error = direct_remap_area_pages(blktap_vma->vm_mm,
- MMAP_VADDR(ID_TO_IDX(req->id), i),
- target->frame_and_sects[i] & PAGE_MASK,
- PAGE_SIZE,
- blktap_vma->vm_page_prot,
- ID_TO_DOM(req->id));
- if ( error != 0 ) {
- printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
- /* the request is now dropped on the floor. */
- return 0;
- }
- }
-
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long ptep;
+
+ uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+
+ /* Map the remote page to kernel. */
+ map[op].host_addr = kvaddr;
+ map[op].dom = ID_TO_DOM(req->id);
+ map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]);
+ map[op].flags = GNTMAP_host_map;
+ /* This needs a bit more thought in terms of interposition:
+ * If we want to be able to modify pages during write using
+ * grant table mappings, the guest will either need to allow
+ * it, or we'll need to incur a copy. */
+ if (req->operation == BLKIF_OP_WRITE)
+ map[op].flags |= GNTMAP_readonly;
+ op++;
+
+ /* Now map it to user. */
+ ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+ if (ret)
+ {
+ DPRINTK("Couldn't get a pte addr!\n");
+ goto fail;
+ }
+
+ map[op].host_addr = ptep;
+ map[op].dom = ID_TO_DOM(req->id);
+ map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]);
+ map[op].flags = GNTMAP_host_map | GNTMAP_application_map
+ | GNTMAP_contains_pte;
+ /* Above interposition comment applies here as well. */
+ if (req->operation == BLKIF_OP_WRITE)
+ map[op].flags |= GNTMAP_readonly;
+ op++;
+ }
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, map, op)))
+ BUG();
+
+ op = 0;
+ for (i=0; i<(target->nr_segments*2); i+=2) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ int cancel = 0;
+
+ uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2);
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2);
+
+ if ( unlikely(map[i].handle < 0) ) {
+ DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
+ ret = map[i].handle;
+ cancel = 1;
+ }
+
+ if ( unlikely(map[i+1].handle < 0) ) {
+ DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
+ ret = map[i+1].handle;
+ cancel = 1;
+ }
+
+ if (cancel)
+ goto fail;
+
+ /* Set the necessary mappings in p2m and in the VM_FOREIGN
+ * vm_area_struct to allow user vaddr -> struct page lookups
+ * to work. This is needed for direct IO to foreign pages. */
+ phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+ offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+ ((struct page **)blktap_vma->vm_private_data)[offset] =
+ pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+ /* Save handles for unmapping later. */
+ pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle;
+ pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle;
+ }
+
+#else
+
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
+ for (i=0; i<target->nr_segments; i++) {
+ unsigned long buf;
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ unsigned long ptep;
+
+ buf = target->frame_and_sects[i] & PAGE_MASK;
+ uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+
+ MULTI_update_va_mapping_otherdomain(
+ mcl+i,
+ kvaddr,
+ pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)),
+ 0,
+ ID_TO_DOM(req->id));
+
+ phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] =
+ FOREIGN_FRAME(buf >> PAGE_SHIFT);
+
+ ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+ if (ret)
+ {
+ DPRINTK("error getting pte\n");
+ goto fail;
+ }
+
+ mmu[i].ptr = ptep;
+ mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK)
+ | pgprot_val(blktap_vma->vm_page_prot);
+
+ offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+ ((struct page **)blktap_vma->vm_private_data)[offset] =
+ pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+ }
+
+ /* Add the mmu_update call. */
+ mcl[i].op = __HYPERVISOR_mmu_update;
+ mcl[i].args[0] = (unsigned long)mmu;
+ mcl[i].args[1] = target->nr_segments;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = ID_TO_DOM(req->id);
+
+ BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0);
+
+ /* Make sure it all worked. */
+ for ( i = 0; i < target->nr_segments; i++ )
+ {
+ if ( unlikely(mcl[i].result != 0) )
+ {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ ret = mcl[i].result;
+ goto fail;
+ }
+ }
+ if ( unlikely(mcl[i].result != 0) )
+ {
+ DPRINTK("direct remapping of pages to /dev/blktap failed.\n");
+ ret = mcl[i].result;
+ goto fail;
+ }
+#endif /* CONFIG_XEN_BLKDEV_GRANT */
+
+ /* Mark mapped pages as reserved: */
+ for ( i = 0; i < target->nr_segments; i++ )
+ {
+ unsigned long kvaddr;
+
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+ SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
+ }
+
+
blktap_ufe_ring.req_prod_pvt++;
return 0;
+
+ fail:
+ fast_flush_area(ID_TO_IDX(req->id), target->nr_segments);
+ return ret;
}
int blktap_write_be_ring(blkif_response_t *rsp)
@@ -366,7 +664,7 @@
{
/* This is called to read responses from the UFE ring. */
- RING_IDX i, rp;
+ RING_IDX i, j, rp;
blkif_response_t *resp_s;
blkif_t *blkif;
active_req_t *ar;
@@ -387,7 +685,21 @@
DPRINTK("resp->fe_ring\n");
ar = lookup_active_req(ID_TO_IDX(resp_s->id));
blkif = ar->blkif;
- zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0),
+ for (j = 0; j < ar->nr_pages; j++) {
+ unsigned long vaddr;
+ struct page **map = blktap_vma->vm_private_data;
+ int offset;
+
+ vaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j);
+ offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+
+ ClearPageReserved(virt_to_page(vaddr));
+ map[offset] = NULL;
+ }
+
+ fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+ zap_page_range(blktap_vma,
+ MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0),
ar->nr_pages << PAGE_SHIFT, NULL);
write_resp_to_fe_ring(blkif, resp_s);
blktap_ufe_ring.rsp_cons = i + 1;
@@ -462,7 +774,18 @@
int blktap_init(void)
{
- int err;
+ int err, i, j;
+ struct page *page;
+
+ page = balloon_alloc_empty_page_range(MMAP_PAGES);
+ BUG_ON(page == NULL);
+ mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ for (i=0; i<MAX_PENDING_REQS ; i++)
+ for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+ BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+#endif
err = misc_register(&blktap_miscdev);
if ( err != 0 )
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu Aug 25
22:53:20 2005
@@ -240,7 +240,11 @@
#endif
/*** Useful function for console debugging -- goes straight to Xen. ***/
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
asmlinkage int xprintk(const char *fmt, ...)
+#else
+asmlinkage int xprintk(const char *fmt, ...)
+#endif
{
va_list args;
int printk_len;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/netback/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/Makefile Thu Aug 25 22:53:20 2005
@@ -1,2 +1,2 @@
-obj-y := netback.o control.o interface.o loopback.o
+obj-y := netback.o xenbus.o interface.o loopback.o
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Aug 25 22:53:20 2005
@@ -15,9 +15,17 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
#include <asm-xen/xen-public/io/netif.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#endif
+
+
#if 0
#define ASSERT(_p) \
@@ -39,9 +47,19 @@
/* Physical parameters of the comms window. */
unsigned long tx_shmem_frame;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ u16 tx_shmem_handle;
+ unsigned long tx_shmem_vaddr;
+ grant_ref_t tx_shmem_ref;
+#endif
unsigned long rx_shmem_frame;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ u16 rx_shmem_handle;
+ unsigned long rx_shmem_vaddr;
+ grant_ref_t rx_shmem_ref;
+#endif
unsigned int evtchn;
- int irq;
+ unsigned int remote_evtchn;
/* The shared rings and indexes. */
netif_tx_interface_t *tx;
@@ -65,36 +83,30 @@
/* Miscellaneous private stuff. */
enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
int active;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
- struct netif_st *hash_next;
struct list_head list; /* scheduling list */
atomic_t refcnt;
struct net_device *dev;
struct net_device_stats stats;
- struct work_struct work;
+ struct work_struct free_work;
} netif_t;
-void netif_create(netif_be_create_t *create);
-void netif_destroy(netif_be_destroy_t *destroy);
-void netif_creditlimit(netif_be_creditlimit_t *creditlimit);
-void netif_connect(netif_be_connect_t *connect);
-int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id);
-void netif_disconnect_complete(netif_t *netif);
-netif_t *netif_find_by_handle(domid_t domid, unsigned int handle);
+void netif_creditlimit(netif_t *netif);
+int netif_disconnect(netif_t *netif);
+
+netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
+void free_netif_callback(netif_t *netif);
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+
#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
#define netif_put(_b) \
do { \
if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- netif_disconnect_complete(_b); \
+ free_netif_callback(_b); \
} while (0)
-void netif_interface_init(void);
-void netif_ctrlif_init(void);
+void netif_xenbus_init(void);
void netif_schedule_work(netif_t *netif);
void netif_deschedule_work(netif_t *netif);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Thu Aug 25
22:53:20 2005
@@ -9,31 +9,14 @@
#include "common.h"
#include <linux/rtnetlink.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-#define NETIF_HASHSZ 1024
-#define NETIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(NETIF_HASHSZ-1))
-
-static netif_t *netif_hash[NETIF_HASHSZ];
-
-netif_t *netif_find_by_handle(domid_t domid, unsigned int handle)
-{
- netif_t *netif = netif_hash[NETIF_HASH(domid, handle)];
- while ( (netif != NULL) &&
- ((netif->domid != domid) || (netif->handle != handle)) )
- netif = netif->hash_next;
- return netif;
-}
-
static void __netif_up(netif_t *netif)
{
struct net_device *dev = netif->dev;
spin_lock_bh(&dev->xmit_lock);
netif->active = 1;
spin_unlock_bh(&dev->xmit_lock);
- (void)request_irq(netif->irq, netif_be_int, 0, dev->name, netif);
+ (void)bind_evtchn_to_irqhandler(
+ netif->evtchn, netif_be_int, 0, dev->name, netif);
netif_schedule_work(netif);
}
@@ -43,14 +26,14 @@
spin_lock_bh(&dev->xmit_lock);
netif->active = 0;
spin_unlock_bh(&dev->xmit_lock);
- free_irq(netif->irq, netif);
+ unbind_evtchn_from_irqhandler(netif->evtchn, netif);
netif_deschedule_work(netif);
}
static int net_open(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
- if ( netif->status == CONNECTED )
+ if (netif->status == CONNECTED)
__netif_up(netif);
netif_start_queue(dev);
return 0;
@@ -60,74 +43,23 @@
{
netif_t *netif = netdev_priv(dev);
netif_stop_queue(dev);
- if ( netif->status == CONNECTED )
+ if (netif->status == CONNECTED)
__netif_down(netif);
return 0;
}
-static void __netif_disconnect_complete(void *arg)
-{
- netif_t *netif = (netif_t *)arg;
- ctrl_msg_t cmsg;
- netif_be_disconnect_t disc;
-
- /*
- * These can't be done in netif_disconnect() because at that point there
- * may be outstanding requests in the network stack whose asynchronous
- * responses must still be notified to the remote driver.
- */
- unbind_evtchn_from_irq(netif->evtchn);
- vfree(netif->tx); /* Frees netif->rx as well. */
-
- /* Construct the deferred response message. */
- cmsg.type = CMSG_NETIF_BE;
- cmsg.subtype = CMSG_NETIF_BE_DISCONNECT;
- cmsg.id = netif->disconnect_rspid;
- cmsg.length = sizeof(netif_be_disconnect_t);
- disc.domid = netif->domid;
- disc.netif_handle = netif->handle;
- disc.status = NETIF_BE_STATUS_OKAY;
- memcpy(cmsg.msg, &disc, sizeof(disc));
-
- /*
- * Make sure message is constructed /before/ status change, because
- * after the status change the 'netif' structure could be deallocated at
- * any time. Also make sure we send the response /after/ status change,
- * as otherwise a subsequent CONNECT request could spuriously fail if
- * another CPU doesn't see the status change yet.
- */
- mb();
- if ( netif->status != DISCONNECTING )
- BUG();
- netif->status = DISCONNECTED;
- mb();
-
- /* Send the successful response. */
- ctrl_if_send_response(&cmsg);
-}
-
-void netif_disconnect_complete(netif_t *netif)
-{
- INIT_WORK(&netif->work, __netif_disconnect_complete, (void *)netif);
- schedule_work(&netif->work);
-}
-
-void netif_create(netif_be_create_t *create)
-{
- int err = 0;
- domid_t domid = create->domid;
- unsigned int handle = create->netif_handle;
+netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
+{
+ int err = 0, i;
struct net_device *dev;
- netif_t **pnetif, *netif;
- char name[IFNAMSIZ] = {};
+ netif_t *netif;
+ char name[IFNAMSIZ] = {};
snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
- if ( dev == NULL )
- {
+ if (dev == NULL) {
DPRINTK("Could not create netif: out of memory\n");
- create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
- return;
+ return NULL;
}
netif = netdev_priv(dev);
@@ -142,19 +74,6 @@
netif->credit_usec = 0UL;
init_timer(&netif->credit_timeout);
- pnetif = &netif_hash[NETIF_HASH(domid, handle)];
- while ( *pnetif != NULL )
- {
- if ( ((*pnetif)->domid == domid) && ((*pnetif)->handle == handle) )
- {
- DPRINTK("Could not create netif: already exists\n");
- create->status = NETIF_BE_STATUS_INTERFACE_EXISTS;
- free_netdev(dev);
- return;
- }
- pnetif = &(*pnetif)->hash_next;
- }
-
dev->hard_start_xmit = netif_be_start_xmit;
dev->get_stats = netif_be_get_stats;
dev->open = net_open;
@@ -164,10 +83,10 @@
/* Disable queuing. */
dev->tx_queue_len = 0;
- if ( (create->be_mac[0] == 0) && (create->be_mac[1] == 0) &&
- (create->be_mac[2] == 0) && (create->be_mac[3] == 0) &&
- (create->be_mac[4] == 0) && (create->be_mac[5] == 0) )
- {
+ for (i = 0; i < ETH_ALEN; i++)
+ if (be_mac[i] != 0)
+ break;
+ if (i == ETH_ALEN) {
/*
* Initialise a dummy MAC address. We choose the numerically largest
* non-broadcast address to prevent the address getting stolen by an
@@ -175,87 +94,200 @@
*/
memset(dev->dev_addr, 0xFF, ETH_ALEN);
dev->dev_addr[0] &= ~0x01;
- }
- else
- {
- memcpy(dev->dev_addr, create->be_mac, ETH_ALEN);
- }
-
- memcpy(netif->fe_dev_addr, create->mac, ETH_ALEN);
+ } else
+ memcpy(dev->dev_addr, be_mac, ETH_ALEN);
rtnl_lock();
err = register_netdevice(dev);
rtnl_unlock();
-
- if ( err != 0 )
- {
+ if (err) {
DPRINTK("Could not register new net device %s: err=%d\n",
dev->name, err);
- create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
free_netdev(dev);
- return;
- }
-
- netif->hash_next = *pnetif;
- *pnetif = netif;
+ return NULL;
+ }
DPRINTK("Successfully created netif\n");
- create->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_destroy(netif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->netif_handle;
- netif_t **pnetif, *netif;
-
- pnetif = &netif_hash[NETIF_HASH(domid, handle)];
- while ( (netif = *pnetif) != NULL )
+ return netif;
+}
+
+static int map_frontend_page(netif_t *netif, unsigned long localaddr,
+ unsigned long tx_ring_ref, unsigned long
rx_ring_ref)
+{
+#if !defined(CONFIG_XEN_NETDEV_GRANT_TX)||!defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ pgprot_t prot = __pgprot(_KERNPG_TABLE);
+ int err;
+#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX)
{
- if ( (netif->domid == domid) && (netif->handle == handle) )
- {
- if ( netif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
+ struct gnttab_map_grant_ref op;
+
+ /* Map: Use the Grant table reference */
+ op.host_addr = localaddr;
+ op.flags = GNTMAP_host_map;
+ op.ref = tx_ring_ref;
+ op.dom = netif->domid;
+
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ return op.handle;
}
- pnetif = &netif->hash_next;
- }
-
- destroy->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pnetif = netif->hash_next;
+
+ netif->tx_shmem_ref = tx_ring_ref;
+ netif->tx_shmem_handle = op.handle;
+ netif->tx_shmem_vaddr = localaddr;
+ }
+#else
+ err = direct_remap_area_pages(&init_mm, localaddr,
+ tx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
+ prot, netif->domid);
+ if (err)
+ return err;
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ {
+ struct gnttab_map_grant_ref op;
+
+ /* Map: Use the Grant table reference */
+ op.host_addr = localaddr + PAGE_SIZE;
+ op.flags = GNTMAP_host_map;
+ op.ref = rx_ring_ref;
+ op.dom = netif->domid;
+
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ return op.handle;
+ }
+
+ netif->rx_shmem_ref = rx_ring_ref;
+ netif->rx_shmem_handle = op.handle;
+ netif->rx_shmem_vaddr = localaddr + PAGE_SIZE;
+ }
+#else
+ err = direct_remap_area_pages(&init_mm, localaddr + PAGE_SIZE,
+ rx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
+ prot, netif->domid);
+ if (err)
+ return err;
+#endif
+
+ return 0;
+}
+
+static void unmap_frontend_page(netif_t *netif)
+{
+#if defined(CONFIG_XEN_NETDEV_GRANT_RX) || defined(CONFIG_XEN_NETDEV_GRANT_TX)
+ struct gnttab_unmap_grant_ref op;
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ op.host_addr = netif->tx_shmem_vaddr;
+ op.handle = netif->tx_shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ op.host_addr = netif->rx_shmem_vaddr;
+ op.handle = netif->rx_shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
+}
+
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn)
+{
+ struct vm_struct *vma;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
+
+ vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP);
+ if (vma == NULL)
+ return -ENOMEM;
+
+ err = map_frontend_page(netif, (unsigned long)vma->addr, tx_ring_ref,
+ rx_ring_ref);
+ if (err) {
+ vfree(vma->addr);
+ return err;
+ }
+
+ op.u.bind_interdomain.dom1 = DOMID_SELF;
+ op.u.bind_interdomain.dom2 = netif->domid;
+ op.u.bind_interdomain.port1 = 0;
+ op.u.bind_interdomain.port2 = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_frontend_page(netif);
+ vfree(vma->addr);
+ return err;
+ }
+
+ netif->evtchn = op.u.bind_interdomain.port1;
+ netif->remote_evtchn = evtchn;
+
+ netif->tx = (netif_tx_interface_t *)vma->addr;
+ netif->rx = (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
+ netif->tx->resp_prod = netif->rx->resp_prod = 0;
+ netif_get(netif);
+ wmb(); /* Other CPUs see new state before interface is started. */
+
+ rtnl_lock();
+ netif->status = CONNECTED;
+ wmb();
+ if (netif_running(netif->dev))
+ __netif_up(netif);
+ rtnl_unlock();
+
+ return 0;
+}
+
+static void free_netif(void *arg)
+{
+ evtchn_op_t op = { .cmd = EVTCHNOP_close };
+ netif_t *netif = (netif_t *)arg;
+
+ /*
+ * These can't be done in netif_disconnect() because at that point there
+ * may be outstanding requests in the network stack whose asynchronous
+ * responses must still be notified to the remote driver.
+ */
+
+ op.u.close.port = netif->evtchn;
+ op.u.close.dom = DOMID_SELF;
+ HYPERVISOR_event_channel_op(&op);
+ op.u.close.port = netif->remote_evtchn;
+ op.u.close.dom = netif->domid;
+ HYPERVISOR_event_channel_op(&op);
+
unregister_netdev(netif->dev);
+
+ if (netif->tx) {
+ unmap_frontend_page(netif);
+ vfree(netif->tx); /* Frees netif->rx as well. */
+ }
+
free_netdev(netif->dev);
- destroy->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_creditlimit(netif_be_creditlimit_t *creditlimit)
-{
- domid_t domid = creditlimit->domid;
- unsigned int handle = creditlimit->netif_handle;
- netif_t *netif;
-
- netif = netif_find_by_handle(domid, handle);
- if ( unlikely(netif == NULL) )
- {
- DPRINTK("netif_creditlimit attempted for non-existent netif"
- " (%u,%u)\n", creditlimit->domid, creditlimit->netif_handle);
- creditlimit->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
+}
+
+void free_netif_callback(netif_t *netif)
+{
+ INIT_WORK(&netif->free_work, free_netif, (void *)netif);
+ schedule_work(&netif->free_work);
+}
+
+void netif_creditlimit(netif_t *netif)
+{
+#if 0
/* Set the credit limit (reset remaining credit to new limit). */
netif->credit_bytes = netif->remaining_credit = creditlimit->credit_bytes;
netif->credit_usec = creditlimit->period_usec;
- if ( netif->status == CONNECTED )
- {
+ if (netif->status == CONNECTED) {
/*
* Schedule work so that any packets waiting under previous credit
* limit are dealt with (acts like a replenishment point).
@@ -263,119 +295,22 @@
netif->credit_timeout.expires = jiffies;
netif_schedule_work(netif);
}
-
- creditlimit->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_connect(netif_be_connect_t *connect)
-{
- domid_t domid = connect->domid;
- unsigned int handle = connect->netif_handle;
- unsigned int evtchn = connect->evtchn;
- unsigned long tx_shmem_frame = connect->tx_shmem_frame;
- unsigned long rx_shmem_frame = connect->rx_shmem_frame;
- struct vm_struct *vma;
- pgprot_t prot;
- int error;
- netif_t *netif;
-
- netif = netif_find_by_handle(domid, handle);
- if ( unlikely(netif == NULL) )
- {
- DPRINTK("netif_connect attempted for non-existent netif (%u,%u)\n",
- connect->domid, connect->netif_handle);
- connect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- if ( netif->status != DISCONNECTED )
- {
- connect->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
- }
-
- if ( (vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP)) == NULL )
- {
- connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- prot = __pgprot(_KERNPG_TABLE);
- error = direct_remap_area_pages(&init_mm,
- VMALLOC_VMADDR(vma->addr),
- tx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- error |= direct_remap_area_pages(&init_mm,
- VMALLOC_VMADDR(vma->addr) + PAGE_SIZE,
- rx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- if ( error == -ENOMEM )
- connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT )
- connect->status = NETIF_BE_STATUS_MAPPING_ERROR;
- else
- connect->status = NETIF_BE_STATUS_ERROR;
- vfree(vma->addr);
- return;
- }
-
- netif->evtchn = evtchn;
- netif->irq = bind_evtchn_to_irq(evtchn);
- netif->tx_shmem_frame = tx_shmem_frame;
- netif->rx_shmem_frame = rx_shmem_frame;
- netif->tx =
- (netif_tx_interface_t *)vma->addr;
- netif->rx =
- (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
- netif->tx->resp_prod = netif->rx->resp_prod = 0;
- netif_get(netif);
- wmb(); /* Other CPUs see new state before interface is started. */
-
- rtnl_lock();
- netif->status = CONNECTED;
- wmb();
- if ( netif_running(netif->dev) )
- __netif_up(netif);
- rtnl_unlock();
-
- connect->status = NETIF_BE_STATUS_OKAY;
-}
-
-int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id)
-{
- domid_t domid = disconnect->domid;
- unsigned int handle = disconnect->netif_handle;
- netif_t *netif;
-
- netif = netif_find_by_handle(domid, handle);
- if ( unlikely(netif == NULL) )
- {
- DPRINTK("netif_disconnect attempted for non-existent netif"
- " (%u,%u)\n", disconnect->domid, disconnect->netif_handle);
- disconnect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return 1; /* Caller will send response error message. */
- }
-
- if ( netif->status == CONNECTED )
- {
+#endif
+}
+
+int netif_disconnect(netif_t *netif)
+{
+
+ if (netif->status == CONNECTED) {
rtnl_lock();
netif->status = DISCONNECTING;
- netif->disconnect_rspid = rsp_id;
wmb();
- if ( netif_running(netif->dev) )
+ if (netif_running(netif->dev))
__netif_down(netif);
rtnl_unlock();
netif_put(netif);
return 0; /* Caller should not send response message. */
}
- disconnect->status = NETIF_BE_STATUS_OKAY;
return 1;
}
-
-void netif_interface_init(void)
-{
- memset(netif_hash, 0, sizeof(netif_hash));
-}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Aug 25
22:53:20 2005
@@ -12,11 +12,6 @@
#include "common.h"
#include <asm-xen/balloon.h>
-#include <asm-xen/evtchn.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#include <linux/delay.h>
-#endif
#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
#include <asm-xen/xen-public/grant_table.h>
@@ -44,7 +39,7 @@
static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
- memory_t addr,
+ unsigned long addr,
u16 size,
u16 csum_valid);
@@ -55,11 +50,15 @@
static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
static struct timer_list net_timer;
+
+#define MAX_PENDING_REQS 256
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
-#ifndef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
+#else
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
@@ -67,7 +66,6 @@
/* Don't currently gate addition of an interface to the tx scheduling list. */
#define tx_work_exists(_if) (1)
-#define MAX_PENDING_REQS 256
static unsigned long mmap_vstart;
#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
@@ -91,11 +89,9 @@
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
static u16 grant_tx_ref[MAX_PENDING_REQS];
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
-#endif
-#ifndef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+#else
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
#endif
@@ -153,11 +149,7 @@
static inline int is_xen_skb(struct sk_buff *skb)
{
extern kmem_cache_t *skbuff_cachep;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
-#else
- kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->list.next;
-#endif
return (cp == skbuff_cachep);
}
@@ -251,7 +243,7 @@
#else
struct mmuext_op *mmuext;
#endif
- unsigned long vdata, mdata, new_mfn;
+ unsigned long vdata, old_mfn, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
u16 notify_list[NETIF_RX_RING_SIZE];
@@ -271,7 +263,7 @@
{
netif = netdev_priv(skb->dev);
vdata = (unsigned long)skb->data;
- mdata = virt_to_machine(vdata);
+ old_mfn = virt_to_mfn(vdata);
/* Memory squeeze? Back off for an arbitrary while. */
if ( (new_mfn = alloc_mfn()) == 0 )
@@ -293,7 +285,7 @@
mcl++;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- gop->mfn = mdata >> PAGE_SHIFT;
+ gop->mfn = old_mfn;
gop->domid = netif->domid;
gop->handle = netif->rx->ring[
MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
@@ -308,7 +300,7 @@
mcl++;
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
- mmuext->mfn = mdata >> PAGE_SHIFT;
+ mmuext->mfn = old_mfn;
mmuext++;
#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -318,7 +310,7 @@
__skb_queue_tail(&rxq, skb);
#ifdef DEBUG_GRANT
- dump_packet('a', mdata, vdata);
+ dump_packet('a', old_mfn, vdata);
#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
@@ -345,10 +337,8 @@
mcl = rx_mcl;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
- grant_rx_op, gop - grant_rx_op))) {
- BUG();
- }
+ BUG_ON(HYPERVISOR_grant_table_op(
+ GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
gop = grant_rx_op;
#else
mmuext = rx_mmuext;
@@ -361,10 +351,9 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- mdata = (unsigned long)skb->data & ~PAGE_MASK;
-#else
- mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
- ((unsigned long)skb->data & ~PAGE_MASK));
+ old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+#else
+ old_mfn = mmuext[0].mfn;
#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
@@ -379,18 +368,20 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- BUG_ON(gop->status != 0);
+ BUG_ON(gop->status != 0); /* XXX */
#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
- free_mfn(mdata >> PAGE_SHIFT);
+ free_mfn(old_mfn);
status = NETIF_RSP_ERROR;
}
#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
- if ( make_rx_response(netif, id, status, mdata,
+ if ( make_rx_response(netif, id, status,
+ (old_mfn << PAGE_SHIFT) | /* XXX */
+ ((unsigned long)skb->data & ~PAGE_MASK),
size, skb->proto_csum_valid) &&
(rx_notify[evtchn] == 0) )
{
@@ -493,7 +484,6 @@
inline static void net_tx_action_dealloc(void)
{
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS];
gnttab_unmap_grant_ref_t *gop;
#else
multicall_entry_t *mcl;
@@ -509,19 +499,18 @@
/*
* Free up any grants we have finished using
*/
- gop = unmap_ops;
- while (dc != dp) {
+ gop = tx_unmap_ops;
+ while ( dc != dp )
+ {
pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
- gop->host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->host_addr = MMAP_VADDR(pending_idx);
gop->dev_bus_addr = 0;
- gop->handle = grant_tx_ref[pending_idx];
+ gop->handle = grant_tx_ref[pending_idx];
grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
gop++;
}
- if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
- unmap_ops, gop - unmap_ops))) {
- BUG();
- }
+ BUG_ON(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
#else
mcl = tx_mcl;
while ( dc != dp )
@@ -584,7 +573,6 @@
u16 pending_idx;
NETIF_RING_IDX i;
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS];
gnttab_map_grant_ref_t *mop;
#else
multicall_entry_t *mcl;
@@ -595,7 +583,7 @@
net_tx_action_dealloc();
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- mop = map_ops;
+ mop = tx_map_ops;
#else
mcl = tx_mcl;
#endif
@@ -646,11 +634,7 @@
netif->credit_timeout.expires = next_credit;
netif->credit_timeout.data = (unsigned long)netif;
netif->credit_timeout.function = tx_credit_callback;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
add_timer_on(&netif->credit_timeout, smp_processor_id());
-#else
- add_timer(&netif->credit_timeout);
-#endif
break;
}
}
@@ -700,10 +684,10 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- mop->host_virt_addr = MMAP_VADDR(pending_idx);
- mop->dom = netif->domid;
- mop->ref = txreq.addr >> PAGE_SHIFT;
- mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+ mop->host_addr = MMAP_VADDR(pending_idx);
+ mop->dom = netif->domid;
+ mop->ref = txreq.addr >> PAGE_SHIFT;
+ mop->flags = GNTMAP_host_map | GNTMAP_readonly;
mop++;
#else
MULTI_update_va_mapping_otherdomain(
@@ -723,7 +707,7 @@
pending_cons++;
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if ((mop - map_ops) >= ARRAY_SIZE(map_ops))
+ if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
break;
#else
/* Filled the batch queue? */
@@ -733,20 +717,18 @@
}
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if (mop == map_ops) {
+ if ( mop == tx_map_ops )
return;
- }
- if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
- map_ops, mop - map_ops))) {
- BUG();
- }
- mop = map_ops;
+
+ BUG_ON(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
+
+ mop = tx_map_ops;
#else
if ( mcl == tx_mcl )
return;
- if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
- BUG();
+ BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
mcl = tx_mcl;
#endif
@@ -758,7 +740,13 @@
/* Check the remap error code. */
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if (unlikely(mop->dev_bus_addr == 0)) {
+ /*
+ XXX SMH: error returns from grant operations are pretty poorly
+ specified/thought out, but the below at least conforms with
+ what the rest of the code uses.
+ */
+ if ( unlikely(mop->handle < 0) )
+ {
printk(KERN_ALERT "#### netback grant fails\n");
make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
netif_put(netif);
@@ -768,7 +756,7 @@
continue;
}
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
- FOREIGN_FRAME(mop->dev_bus_addr);
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
grant_tx_ref[pending_idx] = mop->handle;
#else
if ( unlikely(mcl[0].result != 0) )
@@ -887,7 +875,7 @@
static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
- memory_t addr,
+ unsigned long addr,
u16 size,
u16 csum_valid)
{
@@ -966,10 +954,9 @@
net_timer.data = 0;
net_timer.function = net_alarm;
- netif_interface_init();
-
- mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS);
- BUG_ON(mmap_vstart == 0);
+ page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
+ BUG_ON(page == NULL);
+ mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
for ( i = 0; i < MAX_PENDING_REQS; i++ )
{
@@ -986,7 +973,7 @@
spin_lock_init(&net_schedule_list_lock);
INIT_LIST_HEAD(&net_schedule_list);
- netif_ctrlif_init();
+ netif_xenbus_init();
(void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
netif_be_dbg, SA_SHIRQ,
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Aug 25
22:53:20 2005
@@ -48,7 +48,7 @@
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm-xen/evtchn.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/xenbus.h>
#include <asm-xen/xen-public/io/netif.h>
#include <asm-xen/balloon.h>
#include <asm/page.h>
@@ -59,7 +59,7 @@
#include <asm-xen/gnttab.h>
#ifdef GRANT_DEBUG
static void
-dump_packet(int tag, u32 addr, u32 ap)
+dump_packet(int tag, void *addr, u32 ap)
{
unsigned char *p = (unsigned char *)ap;
int i;
@@ -102,19 +102,23 @@
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t gref_tx_head;
static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t gref_rx_head;
static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
#endif
#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-static domid_t rdomid = 0;
#define GRANT_INVALID_REF (0xFFFF)
#endif
+
+#define NETIF_STATE_DISCONNECTED 0
+#define NETIF_STATE_CONNECTED 1
+
+static unsigned int netif_state = NETIF_STATE_DISCONNECTED;
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -133,12 +137,11 @@
#define xennet_proc_delif(d) ((void)0)
#endif
-static struct list_head dev_list;
-
+#define netfront_info net_private
struct net_private
{
struct list_head list;
- struct net_device *dev;
+ struct net_device *netdev;
struct net_device_stats stats;
NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
@@ -152,7 +155,6 @@
unsigned int handle;
unsigned int evtchn;
- unsigned int irq;
/* What is the status of our connection to the remote backend? */
#define BEST_CLOSED 0
@@ -177,6 +179,14 @@
*/
struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+
+ struct xenbus_device *xbdev;
+ char *backend;
+ int backend_id;
+ struct xenbus_watch watch;
+ int tx_ring_ref;
+ int rx_ring_ref;
+ u8 mac[ETH_ALEN];
};
/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
@@ -188,20 +198,15 @@
(_list)[0] = (_list)[_id]; \
(unsigned short)_id; })
-static char *status_name[] = {
- [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
- [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
- [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
- [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
-};
-
+#ifdef DEBUG
static char *be_state_name[] = {
[BEST_CLOSED] = "closed",
[BEST_DISCONNECTED] = "disconnected",
[BEST_CONNECTED] = "connected",
};
-
-#if DEBUG
+#endif
+
+#ifdef DEBUG
#define DPRINTK(fmt, args...) \
printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
#else
@@ -211,89 +216,6 @@
printk(KERN_INFO "xen_net: " fmt, ##args)
#define WPRINTK(fmt, args...) \
printk(KERN_WARNING "xen_net: " fmt, ##args)
-
-static struct net_device *find_dev_by_handle(unsigned int handle)
-{
- struct list_head *ent;
- struct net_private *np;
- list_for_each (ent, &dev_list) {
- np = list_entry(ent, struct net_private, list);
- if (np->handle == handle)
- return np->dev;
- }
- return NULL;
-}
-
-/** Network interface info. */
-struct netif_ctrl {
- /** Number of interfaces. */
- int interface_n;
- /** Number of connected interfaces. */
- int connected_n;
- /** Error code. */
- int err;
- int up;
-};
-
-static struct netif_ctrl netctrl;
-
-static void netctrl_init(void)
-{
- memset(&netctrl, 0, sizeof(netctrl));
- netctrl.up = NETIF_DRIVER_STATUS_DOWN;
-}
-
-/** Get or set a network interface error.
- */
-static int netctrl_err(int err)
-{
- if ((err < 0) && !netctrl.err)
- netctrl.err = err;
- return netctrl.err;
-}
-
-/** Test if all network interfaces are connected.
- *
- * @return 1 if all connected, 0 if not, negative error code otherwise
- */
-static int netctrl_connected(void)
-{
- int ok;
-
- if (netctrl.err)
- ok = netctrl.err;
- else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
- ok = (netctrl.connected_n == netctrl.interface_n);
- else
- ok = 0;
-
- return ok;
-}
-
-/** Count the connected network interfaces.
- *
- * @return connected count
- */
-static int netctrl_connected_count(void)
-{
-
- struct list_head *ent;
- struct net_private *np;
- unsigned int connected;
-
- connected = 0;
-
- list_for_each(ent, &dev_list) {
- np = list_entry(ent, struct net_private, list);
- if (np->backend_state == BEST_CONNECTED)
- connected++;
- }
-
- netctrl.connected_n = connected;
- DPRINTK("> connected_n=%d interface_n=%d\n",
- netctrl.connected_n, netctrl.interface_n);
- return connected;
-}
/** Send a packet on a net device to encourage switches to learn the
* MAC. We send a fake ARP request.
@@ -357,10 +279,14 @@
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
- printk(KERN_ALERT "netfront: query foreign access\n");
+ if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
+ /* other domain is still using this grant - shouldn't happen
+ but if it does, we'll try to reclaim the grant later */
+ printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
+ "still in use by backend domain.\n");
+ goto out;
}
- gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_end_foreign_access_ref(grant_tx_ref[id], GNTMAP_readonly);
gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
grant_tx_ref[id] = GRANT_INVALID_REF;
#endif
@@ -383,6 +309,10 @@
mb();
} while (prod != np->tx->resp_prod);
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ out:
+#endif
+
if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
np->tx_full = 0;
if (np->user_state == UST_OPEN)
@@ -434,16 +364,17 @@
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- if ((ref = gnttab_claim_grant_reference(&gref_rx_head,
gref_rx_terminal)) < 0) {
+ ref = gnttab_claim_grant_reference(&gref_rx_head);
+ if (unlikely(ref < 0)) {
printk(KERN_ALERT "#### netfront can't claim rx reference\n");
BUG();
}
grant_rx_ref[id] = ref;
- gnttab_grant_foreign_transfer_ref(ref, rdomid,
- virt_to_machine(skb->head) >> PAGE_SHIFT);
+ gnttab_grant_foreign_transfer_ref(ref, np->backend_id,
+ virt_to_mfn(skb->head));
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
#endif
- rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+ rx_pfn_array[i] = virt_to_mfn(skb->head);
/* Remove this page from pseudo phys map before passing back to Xen. */
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
@@ -529,17 +460,19 @@
tx->id = id;
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal))
< 0) {
+ ref = gnttab_claim_grant_reference(&gref_tx_head);
+ if (unlikely(ref < 0)) {
printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
BUG();
}
- mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
- gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
- tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ mfn = virt_to_mfn(skb->data);
+ gnttab_grant_foreign_access_ref(ref, np->backend_id, mfn, GNTMAP_readonly);
+ tx->addr = ref << PAGE_SHIFT;
grant_tx_ref[id] = ref;
#else
- tx->addr = virt_to_machine(skb->data);
-#endif
+ tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+ tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -639,8 +572,7 @@
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
ref = grant_rx_ref[rx->id];
grant_rx_ref[rx->id] = GRANT_INVALID_REF;
-
- mfn = gnttab_end_foreign_transfer(ref);
+ mfn = gnttab_end_foreign_transfer_ref(ref);
gnttab_release_grant_reference(&gref_rx_head, ref);
#endif
@@ -675,18 +607,20 @@
pfn_pte_ma(mfn, PAGE_KERNEL), 0);
#else
MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
- pfn_pte_ma(rx->addr >> PAGE_SHIFT,
PAGE_KERNEL), 0);
+ pfn_pte_ma(rx->addr >> PAGE_SHIFT,
+ PAGE_KERNEL), 0);
#endif
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
+#else
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- mfn;
-#else
rx->addr >> PAGE_SHIFT;
#endif
+
#ifdef GRANT_DEBUG
- printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x
ref=%04x\n",
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%p mfn=%lu ref=%x\n",
skb->data, mfn, ref);
#endif
__skb_queue_tail(&rxq, skb);
@@ -708,9 +642,9 @@
while ((skb = __skb_dequeue(&rxq)) != NULL) {
#ifdef GRANT_DEBUG
- printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
- skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
- dump_packet('d', skb->data, (unsigned long)skb->data);
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%p mfn=%lu\n",
+ skb->data, virt_to_mfn(skb->data));
+ dump_packet('d', skb->data, (unsigned long)skb->data);
#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
@@ -797,7 +731,7 @@
{
struct net_private *np = netdev_priv(dev);
np->user_state = UST_CLOSED;
- netif_stop_queue(np->dev);
+ netif_stop_queue(np->netdev);
return 0;
}
@@ -809,8 +743,7 @@
}
-static void network_connect(struct net_device *dev,
- netif_fe_interface_status_t *status)
+static void network_connect(struct net_device *dev)
{
struct net_private *np;
int i, requeue_idx;
@@ -843,18 +776,23 @@
* interface has been down.
*/
for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
- if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
- struct sk_buff *skb = np->tx_skbs[i];
-
- tx = &np->tx->ring[requeue_idx++].req;
-
- tx->id = i;
- tx->addr = virt_to_machine(skb->data);
- tx->size = skb->len;
-
- np->stats.tx_bytes += skb->len;
- np->stats.tx_packets++;
- }
+ if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+ struct sk_buff *skb = np->tx_skbs[i];
+
+ tx = &np->tx->ring[requeue_idx++].req;
+
+ tx->id = i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
+#else
+ tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+ tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
+ tx->size = skb->len;
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+ }
}
wmb();
np->tx->req_prod = requeue_idx;
@@ -873,7 +811,7 @@
*/
np->backend_state = BEST_CONNECTED;
wmb();
- notify_via_evtchn(status->evtchn);
+ notify_via_evtchn(np->evtchn);
network_tx_buf_gc(dev);
if (np->user_state == UST_OPEN)
@@ -883,132 +821,21 @@
spin_unlock_irq(&np->tx_lock);
}
-static void vif_show(struct net_private *np)
-{
-#if DEBUG
- if (np) {
- IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
- np->handle,
- be_state_name[np->backend_state],
- np->user_state ? "open" : "closed",
- np->evtchn,
- np->irq,
- np->tx,
- np->rx);
- } else {
- IPRINTK("<vif NULL>\n");
- }
-#endif
-}
-
-/* Send a connect message to xend to tell it to bring up the interface. */
-static void send_interface_connect(struct net_private *np)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
- .length = sizeof(netif_fe_interface_connect_t),
- };
- netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- msg->handle = np->handle;
- msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
- msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Send a driver status notification to the domain controller. */
-static int send_driver_status(int ok)
-{
- int err = 0;
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
- .length = sizeof(netif_fe_driver_status_t),
- };
- netif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
- err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
- return err;
-}
-
-/* Stop network device and free tx/rx queues and irq.
- */
-static void vif_release(struct net_private *np)
-{
- /* Stop old i/f to prevent errors whilst we rebuild the state. */
- spin_lock_irq(&np->tx_lock);
- spin_lock(&np->rx_lock);
- netif_stop_queue(np->dev);
- /* np->backend_state = BEST_DISCONNECTED; */
- spin_unlock(&np->rx_lock);
- spin_unlock_irq(&np->tx_lock);
-
- /* Free resources. */
- if(np->tx != NULL){
- free_irq(np->irq, np->dev);
- unbind_evtchn_from_irq(np->evtchn);
- free_page((unsigned long)np->tx);
- free_page((unsigned long)np->rx);
- np->irq = 0;
- np->evtchn = 0;
- np->tx = NULL;
- np->rx = NULL;
- }
-}
-
-/* Release vif resources and close it down completely.
- */
-static void vif_close(struct net_private *np)
-{
- WPRINTK("Unexpected netif-CLOSED message in state %s\n",
- be_state_name[np->backend_state]);
- vif_release(np);
- np->backend_state = BEST_CLOSED;
- /* todo: take dev down and free. */
- vif_show(np);
-}
-
-/* Move the vif into disconnected state.
- * Allocates tx/rx pages.
- * Sends connect message to xend.
- */
-static void vif_disconnect(struct net_private *np)
-{
- if(np->tx) free_page((unsigned long)np->tx);
- if(np->rx) free_page((unsigned long)np->rx);
- // Before this np->tx and np->rx had better be null.
- np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
- np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
- memset(np->tx, 0, PAGE_SIZE);
- memset(np->rx, 0, PAGE_SIZE);
- np->backend_state = BEST_DISCONNECTED;
- send_interface_connect(np);
- vif_show(np);
-}
-
-/* Begin interface recovery.
- *
- * NB. Whilst we're recovering, we turn the carrier state off. We
- * take measures to ensure that this device isn't used for
- * anything. We also stop the queue for this device. Various
- * different approaches (e.g. continuing to buffer packets) have
- * been tested but don't appear to improve the overall impact on
- * TCP connections.
- *
- * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
- * is initiated by a special "RESET" message - disconnect could
- * just mean we're not allowed to use this interface any more.
- */
-static void vif_reset(struct net_private *np)
-{
- IPRINTK("Attempting to reconnect network interface: handle=%u\n",
- np->handle);
- vif_release(np);
- vif_disconnect(np);
- vif_show(np);
+static void show_device(struct net_private *np)
+{
+#ifdef DEBUG
+ if (np) {
+ IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
+ np->handle,
+ be_state_name[np->backend_state],
+ np->user_state ? "open" : "closed",
+ np->evtchn,
+ np->tx,
+ np->rx);
+ } else {
+ IPRINTK("<vif NULL>\n");
+ }
+#endif
}
/* Move the vif into connected state.
@@ -1016,26 +843,22 @@
* Binds the irq to the event channel.
*/
static void
-vif_connect(struct net_private *np, netif_fe_interface_status_t *status)
-{
- struct net_device *dev = np->dev;
- memcpy(dev->dev_addr, status->mac, ETH_ALEN);
- network_connect(dev, status);
- np->evtchn = status->evtchn;
- np->irq = bind_evtchn_to_irq(np->evtchn);
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
- rdomid = status->domid;
-#endif
- (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
- netctrl_connected_count();
- (void)send_fake_arp(dev);
- vif_show(np);
+connect_device(struct net_private *np, unsigned int evtchn)
+{
+ struct net_device *dev = np->netdev;
+ memcpy(dev->dev_addr, np->mac, ETH_ALEN);
+ np->evtchn = evtchn;
+ network_connect(dev);
+ (void)bind_evtchn_to_irqhandler(
+ np->evtchn, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
+ (void)send_fake_arp(dev);
+ show_device(np);
}
static struct ethtool_ops network_ethtool_ops =
{
- .get_tx_csum = ethtool_op_get_tx_csum,
- .set_tx_csum = ethtool_op_set_tx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = ethtool_op_set_tx_csum,
};
/** Create a network device.
@@ -1043,22 +866,24 @@
* @param val return parameter for created device
* @return 0 on success, error code otherwise
*/
-static int create_netdev(int handle, struct net_device **val)
+static int create_netdev(int handle, struct xenbus_device *dev,
+ struct net_device **val)
{
int i, err = 0;
- struct net_device *dev = NULL;
+ struct net_device *netdev = NULL;
struct net_private *np = NULL;
- if ((dev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
+ if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
err = -ENOMEM;
goto exit;
}
- np = netdev_priv(dev);
+ np = netdev_priv(netdev);
np->backend_state = BEST_CLOSED;
np->user_state = UST_CLOSED;
np->handle = handle;
+ np->xbdev = dev;
spin_lock_init(&np->tx_lock);
spin_lock_init(&np->rx_lock);
@@ -1082,268 +907,47 @@
#endif
}
- dev->open = network_open;
- dev->hard_start_xmit = network_start_xmit;
- dev->stop = network_close;
- dev->get_stats = network_get_stats;
- dev->poll = netif_poll;
- dev->weight = 64;
- dev->features = NETIF_F_IP_CSUM;
-
- SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-
- if ((err = register_netdev(dev)) != 0) {
+ netdev->open = network_open;
+ netdev->hard_start_xmit = network_start_xmit;
+ netdev->stop = network_close;
+ netdev->get_stats = network_get_stats;
+ netdev->poll = netif_poll;
+ netdev->weight = 64;
+ netdev->features = NETIF_F_IP_CSUM;
+
+ SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
+
+ if ((err = register_netdev(netdev)) != 0) {
printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
goto exit;
}
- if ((err = xennet_proc_addif(dev)) != 0) {
- unregister_netdev(dev);
+ if ((err = xennet_proc_addif(netdev)) != 0) {
+ unregister_netdev(netdev);
goto exit;
}
- np->dev = dev;
- list_add(&np->list, &dev_list);
+ np->netdev = netdev;
exit:
- if ((err != 0) && (dev != NULL))
- kfree(dev);
+ if ((err != 0) && (netdev != NULL))
+ kfree(netdev);
else if (val != NULL)
- *val = dev;
+ *val = netdev;
return err;
}
-/* Get the target interface for a status message.
- * Creates the interface when it makes sense.
- * The returned interface may be null when there is no error.
- *
- * @param status status message
- * @param np return parameter for interface state
- * @return 0 on success, error code otherwise
- */
-static int
-target_vif(netif_fe_interface_status_t *status, struct net_private **np)
-{
- int err = 0;
- struct net_device *dev;
-
- DPRINTK("> handle=%d\n", status->handle);
- if (status->handle < 0) {
- err = -EINVAL;
- goto exit;
- }
-
- if ((dev = find_dev_by_handle(status->handle)) != NULL)
- goto exit;
-
- if (status->status == NETIF_INTERFACE_STATUS_CLOSED)
- goto exit;
- if (status->status == NETIF_INTERFACE_STATUS_CHANGED)
- goto exit;
-
- /* It's a new interface in a good state - create it. */
- DPRINTK("> create device...\n");
- if ((err = create_netdev(status->handle, &dev)) != 0)
- goto exit;
-
- netctrl.interface_n++;
-
- exit:
- if (np != NULL)
- *np = ((dev && !err) ? netdev_priv(dev) : NULL);
- DPRINTK("< err=%d\n", err);
- return err;
-}
-
-/* Handle an interface status message. */
-static void netif_interface_status(netif_fe_interface_status_t *status)
-{
- int err = 0;
- struct net_private *np = NULL;
-
- DPRINTK("> status=%s handle=%d\n",
- status_name[status->status], status->handle);
-
- if ((err = target_vif(status, &np)) != 0) {
- WPRINTK("Invalid netif: handle=%u\n", status->handle);
- return;
- }
-
- if (np == NULL) {
- DPRINTK("> no vif\n");
- return;
- }
-
- switch (status->status) {
- case NETIF_INTERFACE_STATUS_CLOSED:
- switch (np->backend_state) {
- case BEST_CLOSED:
- case BEST_DISCONNECTED:
- case BEST_CONNECTED:
- vif_close(np);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_DISCONNECTED:
- switch (np->backend_state) {
- case BEST_CLOSED:
- vif_disconnect(np);
- break;
- case BEST_DISCONNECTED:
- case BEST_CONNECTED:
- vif_reset(np);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_CONNECTED:
- switch (np->backend_state) {
- case BEST_CLOSED:
- WPRINTK("Unexpected netif status %s in state %s\n",
- status_name[status->status],
- be_state_name[np->backend_state]);
- vif_disconnect(np);
- vif_connect(np, status);
- break;
- case BEST_DISCONNECTED:
- vif_connect(np, status);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_CHANGED:
- /*
- * The domain controller is notifying us that a device has been
- * added or removed.
- */
- break;
-
- default:
- WPRINTK("Invalid netif status code %d\n", status->status);
- break;
- }
-
- vif_show(np);
-}
-
-/*
- * Initialize the network control interface.
- */
-static void netif_driver_status(netif_fe_driver_status_t *status)
-{
- netctrl.up = status->status;
- netctrl_connected_count();
-}
-
-/* Receive handler for control messages. */
-static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-
- switch (msg->subtype) {
- case CMSG_NETIF_FE_INTERFACE_STATUS:
- netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]);
- break;
-
- case CMSG_NETIF_FE_DRIVER_STATUS:
- netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]);
- break;
-
- default:
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-
-#if 1
-/* Wait for all interfaces to be connected.
- *
- * This works OK, but we'd like to use the probing mode (see below).
- */
-static int probe_interfaces(void)
-{
- int err = 0, conn = 0;
- int wait_i, wait_n = 100;
-
- DPRINTK(">\n");
-
- for (wait_i = 0; wait_i < wait_n; wait_i++) {
- DPRINTK("> wait_i=%d\n", wait_i);
- conn = netctrl_connected();
- if(conn) break;
- DPRINTK("> schedule_timeout...\n");
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(10);
- }
-
- DPRINTK("> wait finished...\n");
- if (conn <= 0) {
- err = netctrl_err(-ENETDOWN);
- WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
- }
-
- DPRINTK("< err=%d\n", err);
-
- return err;
-}
-#else
-/* Probe for interfaces until no more are found.
- *
- * This is the mode we'd like to use, but at the moment it panics the kernel.
-*/
-static int probe_interfaces(void)
-{
- int err = 0;
- int wait_i, wait_n = 100;
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
- .length = sizeof(netif_fe_interface_status_t),
- };
- netif_fe_interface_status_t msg = {};
- ctrl_msg_t rmsg = {};
- netif_fe_interface_status_t *reply = (void*)rmsg.msg;
- int state = TASK_UNINTERRUPTIBLE;
- u32 query = -1;
-
- DPRINTK(">\n");
-
- netctrl.interface_n = 0;
- for (wait_i = 0; wait_i < wait_n; wait_i++) {
- DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
- msg.handle = query;
- memcpy(cmsg.msg, &msg, sizeof(msg));
- DPRINTK("> set_current_state...\n");
- set_current_state(state);
- DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
- DPRINTK("> sending...\n");
- err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
- DPRINTK("> err=%d\n", err);
- if(err) goto exit;
- DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
- if((int)reply->handle < 0) {
- // No more interfaces.
- break;
- }
- query = -reply->handle - 2;
- DPRINTK(">netif_interface_status ...\n");
- netif_interface_status(reply);
- }
-
- exit:
- if (err) {
- err = netctrl_err(-ENETDOWN);
- WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
- }
-
- DPRINTK("< err=%d\n", err);
- return err;
-}
-
-#endif
+static int destroy_netdev(struct net_device *netdev)
+{
+
+#ifdef CONFIG_PROC_FS
+ xennet_proc_delif(netdev);
+#endif
+
+ unregister_netdev(netdev);
+
+ return 0;
+}
/*
* We use this notifier to send out a fake ARP reply to reset switches and
@@ -1354,19 +958,11 @@
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net_device *dev = ifa->ifa_dev->dev;
- struct list_head *ent;
- struct net_private *np;
-
- if (event != NETDEV_UP)
- goto out;
-
- list_for_each (ent, &dev_list) {
- np = list_entry(ent, struct net_private, list);
- if (np->dev == dev)
- (void)send_fake_arp(dev);
- }
+
+ /* UP event and is it one of our devices? */
+ if (event == NETDEV_UP && dev->open == network_open)
+ (void)send_fake_arp(dev);
- out:
return NOTIFY_DONE;
}
@@ -1376,66 +972,315 @@
.priority = 0
};
-static int __init netif_init(void)
-{
- int err = 0;
-
- if (xen_start_info.flags & SIF_INITDOMAIN)
- return 0;
+static struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+}
+
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
+{
+ evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+ int err;
+
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
- &gref_tx_head, &gref_tx_terminal) < 0) {
- printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
- return 1;
- }
- printk(KERN_ALERT "#### netfront tx using grant tables\n");
+ info->tx_ring_ref = GRANT_INVALID_REF;
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
- &gref_rx_head, &gref_rx_terminal) < 0) {
- printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
- return 1;
- }
- printk(KERN_ALERT "#### netfront rx using grant tables\n");
-#endif
-
- if ((err = xennet_proc_init()) != 0)
- return err;
-
- IPRINTK("Initialising virtual ethernet driver.\n");
- INIT_LIST_HEAD(&dev_list);
- (void)register_inetaddr_notifier(¬ifier_inetdev);
- netctrl_init();
- (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
- send_driver_status(1);
- err = probe_interfaces();
- if (err)
- ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
-
- DPRINTK("< err=%d\n", err);
- return err;
-}
-
-static void netif_exit(void)
-{
+ info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+
+ info->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
+ if (info->tx == 0) {
+ err = -ENOMEM;
+ xenbus_dev_error(dev, err, "allocating tx ring page");
+ goto out;
+ }
+ info->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
+ if (info->rx == 0) {
+ err = -ENOMEM;
+ xenbus_dev_error(dev, err, "allocating rx ring page");
+ goto out;
+ }
+ memset(info->tx, 0, PAGE_SIZE);
+ memset(info->rx, 0, PAGE_SIZE);
+ info->backend_state = BEST_DISCONNECTED;
+
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
-#endif
+ err = gnttab_grant_foreign_access(info->backend_id,
+ virt_to_mfn(info->tx), 0);
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "granting access to tx ring page");
+ goto out;
+ }
+ info->tx_ring_ref = err;
+#else
+ info->tx_ring_ref = virt_to_mfn(info->tx);
+#endif
+
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
-#endif
-}
-
-static void vif_suspend(struct net_private *np)
-{
+ err = gnttab_grant_foreign_access(info->backend_id,
+ virt_to_mfn(info->rx), 0);
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "granting access to rx ring page");
+ goto out;
+ }
+ info->rx_ring_ref = err;
+#else
+ info->rx_ring_ref = virt_to_mfn(info->rx);
+#endif
+
+ op.u.alloc_unbound.dom = info->backend_id;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ xenbus_dev_error(dev, err, "allocating event channel");
+ goto out;
+ }
+ connect_device(info, op.u.alloc_unbound.port);
+ return 0;
+
+ out:
+ if (info->tx)
+ free_page((unsigned long)info->tx);
+ info->tx = 0;
+ if (info->rx)
+ free_page((unsigned long)info->rx);
+ info->rx = 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (info->tx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(info->tx_ring_ref, 0);
+ info->tx_ring_ref = GRANT_INVALID_REF;
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (info->rx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(info->rx_ring_ref, 0);
+ info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+ return err;
+}
+
+static void netif_free(struct netfront_info *info)
+{
+ if (info->tx)
+ free_page((unsigned long)info->tx);
+ info->tx = 0;
+ if (info->rx)
+ free_page((unsigned long)info->rx);
+ info->rx = 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (info->tx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(info->tx_ring_ref, 0);
+ info->tx_ring_ref = GRANT_INVALID_REF;
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (info->rx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(info->rx_ring_ref, 0);
+ info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+ unbind_evtchn_from_irqhandler(info->evtchn, info->netdev);
+ info->evtchn = 0;
+}
+
+/* Stop network device and free tx/rx queues and irq.
+ */
+static void shutdown_device(struct net_private *np)
+{
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_irq(&np->tx_lock);
+ spin_lock(&np->rx_lock);
+ netif_stop_queue(np->netdev);
+ /* np->backend_state = BEST_DISCONNECTED; */
+ spin_unlock(&np->rx_lock);
+ spin_unlock_irq(&np->tx_lock);
+
+ /* Free resources. */
+ netif_free(np);
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct netfront_info *info)
+{
+ char *backend, *mac, *e, *s;
+ const char *message;
+ int err, i;
+
+ backend = NULL;
+ err = xenbus_gather(dev->nodename,
+ "backend-id", "%i", &info->backend_id,
+ "backend", NULL, &backend,
+ NULL);
+ if (XENBUS_EXIST_ERR(err))
+ goto out;
+ if (backend && strlen(backend) == 0) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
+ dev->nodename);
+ goto out;
+ }
+
+ mac = xenbus_read(dev->nodename, "mac", NULL);
+ if (IS_ERR(mac)) {
+ err = PTR_ERR(mac);
+ xenbus_dev_error(dev, err, "reading %s/mac",
+ dev->nodename);
+ goto out;
+ }
+ s = mac;
+ for (i = 0; i < ETH_ALEN; i++) {
+ info->mac[i] = simple_strtoul(s, &e, 16);
+ if (s == e || (e[0] != ':' && e[0] != 0)) {
+ kfree(mac);
+ err = -ENOENT;
+ xenbus_dev_error(dev, err, "parsing %s/mac",
+ dev->nodename);
+ goto out;
+ }
+ s = &e[1];
+ }
+ kfree(mac);
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_device(dev, info);
+ if (err) {
+ xenbus_dev_error(dev, err, "setting up ring");
+ goto out;
+ }
+
+ err = xenbus_transaction_start(dev->nodename);
+ if (err) {
+ xenbus_dev_error(dev, err, "starting transaction");
+ goto destroy_ring;
+ }
+
+ err = xenbus_printf(dev->nodename, "tx-ring-ref","%u",
+ info->tx_ring_ref);
+ if (err) {
+ message = "writing tx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(dev->nodename, "rx-ring-ref","%u",
+ info->rx_ring_ref);
+ if (err) {
+ message = "writing rx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(dev->nodename,
+ "event-channel", "%u", info->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ info->backend = backend;
+ backend = NULL;
+
+ info->watch.node = info->backend;
+ info->watch.callback = watch_for_status;
+ err = register_xenbus_watch(&info->watch);
+ if (err) {
+ message = "registering watch on backend";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(0);
+ if (err) {
+ xenbus_dev_error(dev, err, "completing transaction");
+ goto destroy_ring;
+ }
+
+ netif_state = NETIF_STATE_CONNECTED;
+
+ out:
+ if (backend)
+ kfree(backend);
+ return err;
+
+ abort_transaction:
+ xenbus_transaction_end(1);
+ /* Have to do this *outside* transaction. */
+ xenbus_dev_error(dev, err, "%s", message);
+ destroy_ring:
+ shutdown_device(info);
+ goto out;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+ We place an event channel and shared frame entries.
+ We watch backend to wait if it's ok. */
+static int netfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ struct net_device *netdev;
+ struct netfront_info *info;
+ unsigned int handle;
+
+ err = xenbus_scanf(dev->nodename, "handle", "%u", &handle);
+ if (XENBUS_EXIST_ERR(err))
+ return err;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading handle");
+ return err;
+ }
+
+ err = create_netdev(handle, dev, &netdev);
+ if (err) {
+ xenbus_dev_error(dev, err, "creating netdev");
+ return err;
+ }
+
+ info = netdev_priv(netdev);
+ dev->data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ destroy_netdev(netdev);
+ kfree(netdev);
+ dev->data = NULL;
+ return err;
+ }
+
+
+ /* Call once in case entries already there. */
+ watch_for_status(&info->watch, info->watch.node);
+
+ return 0;
+}
+
+static int netfront_remove(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev->data;
+
+ if (info->backend)
+ unregister_xenbus_watch(&info->watch);
+
+ netif_free(info);
+
+ kfree(info->backend);
+ kfree(info);
+
+ return 0;
+}
+
+static int netfront_suspend(struct xenbus_device *dev)
+{
+ struct net_private *np = dev->data;
/* Avoid having tx/rx stuff happen until we're ready. */
- free_irq(np->irq, np->dev);
- unbind_evtchn_from_irq(np->evtchn);
-}
-
-static void vif_resume(struct net_private *np)
-{
+ unbind_evtchn_from_irqhandler(np->evtchn, np->netdev);
+ return 0;
+}
+
+static int netfront_resume(struct xenbus_device *dev)
+{
+ struct net_private *np = dev->data;
/*
* Connect regardless of whether IFF_UP flag set.
* Stop bad things from happening until we're back up.
@@ -1444,29 +1289,96 @@
memset(np->tx, 0, PAGE_SIZE);
memset(np->rx, 0, PAGE_SIZE);
- send_interface_connect(np);
-}
-
-void netif_suspend(void)
-{
- struct list_head *ent;
- struct net_private *np;
-
- list_for_each (ent, &dev_list) {
- np = list_entry(ent, struct net_private, list);
- vif_suspend(np);
- }
-}
-
-void netif_resume(void)
-{
- struct list_head *ent;
- struct net_private *np;
-
- list_for_each (ent, &dev_list) {
- np = list_entry(ent, struct net_private, list);
- vif_resume(np);
- }
+ // send_interface_connect(np);
+ return 0;
+}
+
+static struct xenbus_driver netfront = {
+ .name = "vif",
+ .owner = THIS_MODULE,
+ .ids = netfront_ids,
+ .probe = netfront_probe,
+ .remove = netfront_remove,
+ .resume = netfront_resume,
+ .suspend = netfront_suspend,
+};
+
+static void __init init_net_xenbus(void)
+{
+ xenbus_register_device(&netfront);
+}
+
+static int wait_for_netif(void)
+{
+ int err = 0;
+ int i;
+
+ /*
+ * We should figure out how many and which devices we need to
+ * proceed and only wait for those. For now, continue once the
+ * first device is around.
+ */
+ for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ if (netif_state != NETIF_STATE_CONNECTED) {
+ WPRINTK("Timeout connecting to device!\n");
+ err = -ENOSYS;
+ }
+ return err;
+}
+
+static int __init netif_init(void)
+{
+ int err = 0;
+
+ if (xen_start_info.flags & SIF_INITDOMAIN)
+ return 0;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /* A grant for every ring slot */
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /* A grant for every ring slot */
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n");
+#endif
+
+ if ((err = xennet_proc_init()) != 0)
+ return err;
+
+ IPRINTK("Initialising virtual ethernet driver.\n");
+
+ (void)register_inetaddr_notifier(¬ifier_inetdev);
+
+ init_net_xenbus();
+
+ wait_for_netif();
+
+ return err;
+}
+
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(gref_rx_head);
+#endif
}
#ifdef CONFIG_PROC_FS
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu Aug 25
22:53:20 2005
@@ -139,7 +139,7 @@
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
unsigned long *p, addr;
- unsigned long mfn;
+ unsigned long mfn, ptep;
int i;
if ( copy_from_user(&m, (void *)data, sizeof(m)) )
@@ -163,12 +163,12 @@
if ( get_user(mfn, p) )
return -EFAULT;
- u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
-
- __direct_remap_area_pages(vma->vm_mm,
- addr,
- PAGE_SIZE,
- &u);
+ ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
+ if (ret)
+ goto batch_err;
+
+ u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
+ u.ptr = ptep;
if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
put_user(0xF0000000 | mfn, p);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/usbback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Aug 25 22:53:20 2005
@@ -37,7 +37,6 @@
/* Physical parameters of the comms window. */
unsigned long shmem_frame;
unsigned int evtchn;
- int irq;
/* Comms Information */
usbif_back_ring_t usb_ring;
/* Private fields. */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/usbback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c Thu Aug 25
22:53:20 2005
@@ -6,15 +6,6 @@
* by Mark Williamson, Copyright (c) 2004
*/
-
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/interface.c
- *
- * Block-device interface management.
- *
- * Copyright (c) 2004, Keir Fraser
- */
-
#include "common.h"
#define USBIF_HASHSZ 1024
@@ -42,7 +33,6 @@
* may be outstanding requests at the device whose asynchronous responses
* must still be notified to the remote driver.
*/
- unbind_evtchn_from_irq(usbif->evtchn);
vfree(usbif->usb_ring.sring);
/* Construct the deferred response message. */
@@ -198,12 +188,12 @@
BACK_RING_INIT(&up->usb_ring, sring, PAGE_SIZE);
up->evtchn = evtchn;
- up->irq = bind_evtchn_to_irq(evtchn);
up->shmem_frame = shmem_frame;
up->status = CONNECTED;
usbif_get(up);
- request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
+ (void)bind_evtchn_to_irqhandler(
+ evtchn, usbif_be_int, 0, "usbif-backend", up);
connect->status = USBIF_BE_STATUS_OKAY;
}
@@ -233,7 +223,7 @@
up->status = DISCONNECTING;
up->disconnect_rspid = rsp_id;
wmb(); /* Let other CPUs see the status change. */
- free_irq(up->irq, up);
+ unbind_evtchn_from_irqhandler(up->evtchn, up);
usbif_deschedule(up);
usbif_put(up);
return 0; /* Caller should not send response message. */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Thu Aug 25
22:53:20 2005
@@ -657,8 +657,8 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
- ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
- == buffer_mach + i << PAGE_SHIFT);
+ ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
+ == ((buffer_mach >> PAGE_SHIFT) + i));
}
if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
@@ -1027,13 +1027,15 @@
static int __init usbif_init(void)
{
int i;
+ struct page *page;
if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
!(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
return 0;
-
- if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
- BUG();
+
+ page = balloon_alloc_empty_page_range(MMAP_PAGES);
+ BUG_ON(page == NULL);
+ mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
pending_cons = 0;
pending_prod = MAX_PENDING_REQS;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Thu Aug 25
22:53:20 2005
@@ -195,7 +195,7 @@
}
urb_priv->schedule = schedule;
- req->iso_schedule = virt_to_machine(schedule);
+ req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
return 0;
}
@@ -212,7 +212,7 @@
#if DEBUG
printk(KERN_DEBUG
"usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons
= %d\n",
- usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+ usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
usbif->resp_prod, xhci->usb_resp_cons);
#endif
@@ -232,7 +232,7 @@
req->operation = USBIF_OP_IO;
req->port = 0; /* We don't care what the port is. */
req->id = (unsigned long) urb->hcpriv;
- req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+ req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
req->devnum = usb_pipedevice(urb->pipe);
req->direction = usb_pipein(urb->pipe);
req->speed = usb_pipeslow(urb->pipe);
@@ -280,7 +280,7 @@
printk(KERN_DEBUG
"queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
"resp_cons = %d\n", usbif->req_prod,
- virt_to_machine(&usbif->req_prod),
+ virt_to_mfn(&usbif->req_prod),
usbif->resp_prod, xhci->usb_resp_cons);
#endif
@@ -1536,8 +1536,7 @@
/* Clean up resources. */
free_page((unsigned long)xhci->usb_ring.sring);
- free_irq(xhci->irq, xhci);
- unbind_evtchn_from_irq(xhci->evtchn);
+ unbind_evtchn_from_irqhandler(xhci->evtchn, xhci);
/* Plug the ring. */
xhci->recovery = 1;
@@ -1556,7 +1555,7 @@
cmsg.type = CMSG_USBIF_FE;
cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT;
cmsg.length = sizeof(usbif_fe_interface_connect_t);
- up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+ up.shmem_frame = virt_to_mfn(sring);
memcpy(cmsg.msg, &up, sizeof(up));
/* Tell the controller to bring up the interface. */
@@ -1572,7 +1571,6 @@
}
xhci->evtchn = status->evtchn;
- xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
xhci->bandwidth = status->bandwidth;
xhci->rh.numports = status->num_ports;
@@ -1595,14 +1593,14 @@
usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
1000 - xhci->bandwidth, 0);
- if ( (rc = request_irq(xhci->irq, xhci_interrupt,
+ if ( (rc = bind_evtchn_to_irqhandler(xhci->evtchn, xhci_interrupt,
SA_SAMPLE_RANDOM, "usbif", xhci)) )
printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
DPRINTK(KERN_INFO __FILE__
- ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
- xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
- xhci->evtchn, xhci->irq);
+ ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
+ xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
+ xhci->evtchn);
xhci->state = USBIF_STATE_CONNECTED;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h Thu Aug 25 22:53:20 2005
@@ -54,7 +54,6 @@
#endif
int evtchn; /* Interdom channel to backend */
- int irq; /* Bound to evtchn */
enum {
USBIF_STATE_CONNECTED = 2,
USBIF_STATE_DISCONNECTED = 1,
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Thu Aug 25 22:53:20 2005
@@ -4,7 +4,3 @@
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o
-
-XEN_TOOLS_DIR := "../tools"
-vpath %.h $(XEN_TOOLS_DIR)
-EXTRA_CFLAGS += -I $(XEN_TOOLS_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Thu Aug 25
22:53:20 2005
@@ -26,7 +26,6 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-//#define DEBUG
#include <asm-xen/hypervisor.h>
#include <asm-xen/evtchn.h>
@@ -49,13 +48,12 @@
static inline struct ringbuf_head *outbuf(void)
{
- return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT);
+ return mfn_to_virt(xen_start_info.store_mfn);
}
static inline struct ringbuf_head *inbuf(void)
{
- return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT)
- + PAGE_SIZE/2;
+ return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2;
}
static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
@@ -202,14 +200,17 @@
return 0;
}
-/* Set up interrpt handler off store event channel. */
+/* Set up interrupt handler off store event channel. */
int xb_init_comms(void)
{
- int err, irq;
-
- irq = bind_evtchn_to_irq(xen_start_info.store_evtchn);
-
- err = request_irq(irq, wake_waiting, SA_SHIRQ, "xenbus", &xb_waitq);
+ int err;
+
+ if (!xen_start_info.store_evtchn)
+ return 0;
+
+ err = bind_evtchn_to_irqhandler(
+ xen_start_info.store_evtchn, wake_waiting,
+ 0, "xenbus", &xb_waitq);
if (err) {
printk(KERN_ERR "XENBUS request irq failed %i\n", err);
unbind_evtchn_from_irq(xen_start_info.store_evtchn);
@@ -217,8 +218,16 @@
}
/* FIXME zero out page -- domain builder should probably do this*/
- memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT),
- 0, PAGE_SIZE);
+ memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE);
return 0;
}
+
+void xb_suspend_comms(void)
+{
+
+ if (!xen_start_info.store_evtchn)
+ return;
+
+ unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Thu Aug 25
22:53:20 2005
@@ -1,8 +1,36 @@
-/* Private include for xenbus communications. */
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
+
int xs_init(void);
int xb_init_comms(void);
+void xb_suspend_comms(void);
/* Low level routines. */
int xb_write(const void *data, unsigned len);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu Aug 25
22:53:20 2005
@@ -29,30 +29,26 @@
#include <asm-xen/hypervisor.h>
#include <asm-xen/xenbus.h>
+#include <asm-xen/balloon.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/fcntl.h>
#include <stdarg.h>
+#include <linux/notifier.h>
#include "xenbus_comms.h"
#define streq(a, b) (strcmp((a), (b)) == 0)
+
+static struct notifier_block *xenstore_chain;
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
{
for (; !streq(arr->devicetype, ""); arr++) {
- if (!streq(arr->devicetype, dev->devicetype))
- continue;
-
- /* If they don't care what subtype, it's a match. */
- if (streq(arr->subtype, ""))
- return arr;
-
- /* If they care, device must have (same) subtype. */
- if (dev->subtype && streq(arr->subtype, dev->subtype))
+ if (streq(arr->devicetype, dev->devicetype))
return arr;
}
return NULL;
@@ -68,10 +64,102 @@
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+ struct device dev;
+};
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ nodename = strchr(nodename, '/');
+ if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+ return -EINVAL;
+ }
+
+ strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+ if (!strchr(bus_id, '/')) {
+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+ return -EINVAL;
+ }
+ *strchr(bus_id, '/') = '-';
+ return 0;
+}
+
/* Bus type for frontend drivers. */
-static struct bus_type xenbus_type = {
- .name = "xenbus",
- .match = xenbus_match,
+static int xenbus_probe_frontend(const char *type, const char *name);
+static struct xen_bus_type xenbus_frontend = {
+ .root = "device",
+ .levels = 2, /* device/type/<id> */
+ .get_bus_id = frontend_bus_id,
+ .probe = xenbus_probe_frontend,
+ .bus = {
+ .name = "xen",
+ .match = xenbus_match,
+ },
+ .dev = {
+ .bus_id = "xen",
+ },
+};
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ int domid, err;
+ const char *devid, *type, *frontend;
+ unsigned int typelen;
+
+ type = strchr(nodename, '/');
+ if (!type)
+ return -EINVAL;
+ type++;
+ typelen = strcspn(type, "/");
+ if (!typelen || type[typelen] != '/')
+ return -EINVAL;
+
+ devid = strrchr(nodename, '/') + 1;
+
+ err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (err)
+ return err;
+ if (strlen(frontend) == 0)
+ err = -ERANGE;
+
+ if (!err && !xenbus_exists(frontend, ""))
+ err = -ENOENT;
+
+ if (err) {
+ kfree(frontend);
+ return err;
+ }
+
+ if (snprintf(bus_id, BUS_ID_SIZE,
+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+ return -ENOSPC;
+ return 0;
+}
+
+static int xenbus_probe_backend(const char *type, const char *uuid);
+static struct xen_bus_type xenbus_backend = {
+ .root = "backend",
+ .levels = 3, /* backend/type/<frontend>/<id> */
+ .get_bus_id = backend_bus_id,
+ .probe = xenbus_probe_backend,
+ .bus = {
+ .name = "xen-backend",
+ .match = xenbus_match,
+ },
+ .dev = {
+ .bus_id = "xen-backend",
+ },
};
static int xenbus_dev_probe(struct device *_dev)
@@ -100,12 +188,13 @@
return drv->remove(dev);
}
-int xenbus_register_driver(struct xenbus_driver *drv)
+static int xenbus_register_driver(struct xenbus_driver *drv,
+ struct xen_bus_type *bus)
{
int err;
drv->driver.name = drv->name;
- drv->driver.bus = &xenbus_type;
+ drv->driver.bus = &bus->bus;
drv->driver.owner = drv->owner;
drv->driver.probe = xenbus_dev_probe;
drv->driver.remove = xenbus_dev_remove;
@@ -116,6 +205,16 @@
return err;
}
+int xenbus_register_device(struct xenbus_driver *drv)
+{
+ return xenbus_register_driver(drv, &xenbus_frontend);
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+ return xenbus_register_driver(drv, &xenbus_backend);
+}
+
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
down(&xenbus_lock);
@@ -126,52 +225,98 @@
struct xb_find_info
{
struct xenbus_device *dev;
- const char *busid;
+ const char *nodename;
};
static int cmp_dev(struct device *dev, void *data)
{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
struct xb_find_info *info = data;
- if (streq(dev->bus_id, info->busid)) {
- info->dev = container_of(get_device(dev),
- struct xenbus_device, dev);
+ if (streq(xendev->nodename, info->nodename)) {
+ info->dev = xendev;
+ get_device(dev);
return 1;
}
return 0;
}
-/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */
-struct xenbus_device *xenbus_device_find(const char *busid)
-{
- struct xb_find_info info = { .dev = NULL, .busid = busid };
-
- bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev);
+struct xenbus_device *xenbus_device_find(const char *nodename,
+ struct bus_type *bus)
+{
+ struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+ bus_for_each_dev(bus, NULL, &info, cmp_dev);
return info.dev;
}
+static int cleanup_dev(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct xb_find_info *info = data;
+ int len = strlen(info->nodename);
+
+ if (!strncmp(xendev->nodename, info->nodename, len)) {
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
+ }
+ return 0;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+ struct xb_find_info info = { .nodename = path };
+
+ do {
+ info.dev = NULL;
+ bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+ if (info.dev) {
+ device_unregister(&info.dev->dev);
+ put_device(&info.dev->dev);
+ }
+ } while (info.dev);
+}
static void xenbus_release_device(struct device *dev)
{
if (dev) {
struct xenbus_device *xendev = to_xenbus_device(dev);
- kfree(xendev->subtype);
kfree(xendev);
}
}
-/* devices/<typename>/<name> */
-static int xenbus_probe_device(const char *dirpath, const char *devicetype,
- const char *name)
+
+/* Simplified asprintf. */
+static char *kasprintf(const char *fmt, ...)
+{
+ va_list ap;
+ unsigned int len;
+ char *p, dummy[1];
+
+ va_start(ap, fmt);
+ /* FIXME: vsnprintf has a bug, NULL should work */
+ len = vsnprintf(dummy, 0, fmt, ap);
+ va_end(ap);
+
+ p = kmalloc(len + 1, GFP_KERNEL);
+ if (!p)
+ return NULL;
+ va_start(ap, fmt);
+ vsprintf(p, fmt, ap);
+ va_end(ap);
+ return p;
+}
+
+static int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
{
int err;
struct xenbus_device *xendev;
unsigned int stringlen;
- /* Nodename: /device/<typename>/<name>/ */
- stringlen = strlen(dirpath) + strlen(devicetype) + strlen(name) + 3;
- /* Typename */
- stringlen += strlen(devicetype) + 1;
+ stringlen = strlen(nodename) + 1 + strlen(type) + 1;
xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
if (!xendev)
return -ENOMEM;
@@ -179,38 +324,103 @@
/* Copy the strings into the extra space. */
xendev->nodename = (char *)(xendev + 1);
- sprintf(xendev->nodename, "%s/%s/%s", dirpath, devicetype, name);
+ strcpy(xendev->nodename, nodename);
xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1;
- strcpy(xendev->devicetype, devicetype);
-
- /* FIXME: look for "subtype" field. */
- snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s-%s", devicetype, name);
- xendev->dev.bus = &xenbus_type;
+ strcpy(xendev->devicetype, type);
+
+ xendev->dev.parent = &bus->dev;
+ xendev->dev.bus = &bus->bus;
xendev->dev.release = xenbus_release_device;
+
+ err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
+ if (err) {
+ kfree(xendev);
+ return err;
+ }
/* Register with generic device framework. */
err = device_register(&xendev->dev);
if (err) {
- printk("XENBUS: Registering device %s: error %i\n",
- xendev->dev.bus_id, err);
+ printk("XENBUS: Registering %s device %s: error %i\n",
+ bus->bus.name, xendev->dev.bus_id, err);
kfree(xendev);
}
return err;
}
-static int xenbus_probe_device_type(const char *dirpath, const char *typename)
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(const char *type, const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+ const char *type,
+ const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf("%s/%s", dir, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ err = xenbus_probe_node(&xenbus_backend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+/* backend/<typename>/<frontend-uuid> */
+static int xenbus_probe_backend(const char *type, const char *uuid)
+{
+ char *nodename;
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n = 0;
+
+ nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, uuid);
+ if (!nodename)
+ return -ENOMEM;
+
+ dir = xenbus_directory(nodename, "", &dir_n);
+ if (IS_ERR(dir)) {
+ kfree(nodename);
+ return PTR_ERR(dir);
+ }
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ kfree(nodename);
+ return err;
+}
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
int err = 0;
char **dir;
unsigned int dir_n = 0;
int i;
- dir = xenbus_directory(dirpath, typename, &dir_n);
+ dir = xenbus_directory(bus->root, type, &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_device(dirpath, typename, dir[i]);
+ err = bus->probe(type, dir[i]);
if (err)
break;
}
@@ -218,18 +428,18 @@
return err;
}
-static int xenbus_probe_devices(const char *path)
+static int xenbus_probe_devices(struct xen_bus_type *bus)
{
int err = 0;
char **dir;
unsigned int i, dir_n;
- dir = xenbus_directory(path, "", &dir_n);
+ dir = xenbus_directory(bus->root, "", &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_device_type(path, dir[i]);
+ err = xenbus_probe_device_type(bus, dir[i]);
if (err)
break;
}
@@ -247,53 +457,154 @@
return ret;
}
-static void dev_changed(struct xenbus_watch *watch, const char *node)
-{
- char busid[BUS_ID_SIZE];
- int exists;
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c) {
+ if (len == 0)
+ return i;
+ len--;
+ }
+ return (len == 0) ? i : -ERANGE;
+}
+
+static void dev_changed(const char *node, struct xen_bus_type *bus)
+{
+ int exists, rootlen;
struct xenbus_device *dev;
- char *p;
-
- /* Node is of form device/<type>/<identifier>[/...] */
- if (char_count(node, '/') != 2)
+ char type[BUS_ID_SIZE];
+ const char *p, *root;
+
+ if (char_count(node, '/') < 2)
+ return;
+
+ exists = xenbus_exists(node, "");
+ if (!exists) {
+ xenbus_cleanup_devices(node, &bus->bus);
return;
-
- /* Created or deleted? */
- exists = xenbus_exists(node, "");
-
+ }
+
+ /* backend/<type>/... or device/<type>/... */
p = strchr(node, '/') + 1;
- if (strlen(p) + 1 > BUS_ID_SIZE) {
- printk("Device for node %s is too big!\n", node);
+ snprintf(type, BUS_ID_SIZE, "%.*s", strcspn(p, "/"), p);
+ type[BUS_ID_SIZE-1] = '\0';
+
+ rootlen = strsep_len(node, '/', bus->levels);
+ if (rootlen < 0)
return;
- }
- /* Bus ID is name with / changed to - */
- strcpy(busid, p);
- *strchr(busid, '/') = '-';
-
- dev = xenbus_device_find(busid);
- printk("xenbus: device %s %s\n", busid, dev ? "exists" : "new");
- if (dev && !exists) {
- printk("xenbus: Unregistering device %s\n", busid);
- /* FIXME: free? */
- device_unregister(&dev->dev);
- } else if (!dev && exists) {
- printk("xenbus: Adding device %s\n", busid);
- /* Hack bus id back into two strings. */
- *strrchr(busid, '-') = '\0';
- xenbus_probe_device("device", busid, busid+strlen(busid)+1);
- } else
- printk("xenbus: strange, %s already %s\n", busid,
- exists ? "exists" : "gone");
- if (dev)
+ root = kasprintf("%.*s", rootlen, node);
+ if (!root)
+ return;
+
+ dev = xenbus_device_find(root, &bus->bus);
+ if (!dev)
+ xenbus_probe_node(bus, type, root);
+ else
put_device(&dev->dev);
+
+ kfree(root);
+}
+
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+ dev_changed(node, &xenbus_frontend);
+}
+
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+ dev_changed(node, &xenbus_backend);
}
/* We watch for devices appearing and vanishing. */
-static struct xenbus_watch dev_watch = {
- /* FIXME: Ideally we'd only watch for changes 2 levels deep... */
+static struct xenbus_watch fe_watch = {
.node = "device",
- .callback = dev_changed,
+ .callback = frontend_changed,
};
+
+static struct xenbus_watch be_watch = {
+ .node = "backend",
+ .callback = backend_changed,
+};
+
+static int suspend_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->suspend)
+ err = drv->suspend(xdev);
+ if (err)
+ printk("xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+ return 0;
+}
+
+static int resume_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->resume)
+ err = drv->resume(xdev);
+ if (err)
+ printk("xenbus: resume %s failed: %i\n", dev->bus_id, err);
+ return 0;
+}
+
+void xenbus_suspend(void)
+{
+ /* We keep lock, so no comms can happen as page moves. */
+ down(&xenbus_lock);
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+ xb_suspend_comms();
+}
+
+void xenbus_resume(void)
+{
+ xb_init_comms();
+ reregister_xenbus_watches();
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+ up(&xenbus_lock);
+}
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+ int ret = 0;
+
+ down(&xenbus_lock);
+
+ if (xen_start_info.store_evtchn) {
+ ret = nb->notifier_call(nb, 0, NULL);
+ } else {
+ notifier_chain_register(&xenstore_chain, nb);
+ }
+
+ up(&xenbus_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+ down(&xenbus_lock);
+ notifier_chain_unregister(&xenstore_chain, nb);
+ up(&xenbus_lock);
+}
+EXPORT_SYMBOL(unregister_xenstore_notifier);
/* called from a thread in privcmd/privcmd.c */
int do_xenbus_probe(void *unused)
@@ -309,21 +620,25 @@
return err;
}
- /* Initialize non-xenbus drivers */
- balloon_init_watcher();
-
down(&xenbus_lock);
/* Enumerate devices in xenstore. */
- xenbus_probe_devices("device");
+ xenbus_probe_devices(&xenbus_frontend);
+ xenbus_probe_devices(&xenbus_backend);
/* Watch for changes. */
- register_xenbus_watch(&dev_watch);
+ register_xenbus_watch(&fe_watch);
+ register_xenbus_watch(&be_watch);
+ /* Notify others that xenstore is up */
+ notifier_call_chain(&xenstore_chain, 0, 0);
up(&xenbus_lock);
return 0;
}
static int __init xenbus_probe_init(void)
{
- bus_register(&xenbus_type);
+ bus_register(&xenbus_frontend.bus);
+ bus_register(&xenbus_backend.bus);
+ device_register(&xenbus_frontend.dev);
+ device_register(&xenbus_backend.dev);
if (!xen_start_info.store_evtchn)
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Thu Aug 25
22:53:20 2005
@@ -30,7 +30,6 @@
#include <linux/errno.h>
#include <linux/types.h>
-#include "xenstore/xenstored.h"
#include <linux/uio.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -39,6 +38,7 @@
#include <linux/fcntl.h>
#include <linux/kthread.h>
#include <asm-xen/xenbus.h>
+#include "xenstored.h"
#include "xenbus_comms.h"
#define streq(a, b) (strcmp((a), (b)) == 0)
@@ -187,6 +187,7 @@
static char buffer[4096];
BUG_ON(down_trylock(&xenbus_lock) == 0);
+ /* XXX FIXME: might not be correct if name == "" */
BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
strcpy(buffer, dir);
@@ -399,9 +400,12 @@
ret = PTR_ERR(p);
break;
}
- if (sscanf(p, fmt, result) == 0)
- ret = -EINVAL;
- kfree(p);
+ if (fmt) {
+ if (sscanf(p, fmt, result) == 0)
+ ret = -EINVAL;
+ kfree(p);
+ } else
+ *(char **)result = p;
}
va_end(ap);
return ret;
@@ -494,6 +498,18 @@
printk(KERN_WARNING
"XENBUS Failed to release watch %s: %i\n",
watch->node, err);
+}
+
+/* Re-register callbacks to all watches. */
+void reregister_xenbus_watches(void)
+{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_watch(watch->node, token);
+ }
}
static int watch_thread(void *unused)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-generic/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h Thu Aug 25
22:53:20 2005
@@ -37,7 +37,7 @@
*/
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
- set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry); \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h Thu Aug 25
22:53:20 2005
@@ -93,7 +93,7 @@
static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
-#define C(i)
HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN
+ i]), ((u32 *)&t->tls_array[i])[0], ((u32 *)&t->tls_array[i])[1])
+#define C(i)
HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN
+ i]), *(u64 *)&t->tls_array[i])
C(0); C(1); C(2);
#undef C
}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Thu Aug
25 22:53:20 2005
@@ -1,11 +1,35 @@
#ifndef _ASM_I386_DMA_MAPPING_H
#define _ASM_I386_DMA_MAPPING_H
+/*
+ * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+ * documentation.
+ */
+
+#include <linux/config.h>
#include <linux/mm.h>
-
#include <asm/cache.h>
#include <asm/io.h>
#include <asm/scatterlist.h>
+#include <asm-i386/swiotlb.h>
+
+static inline int
+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+{
+ dma_addr_t mask = 0xffffffff;
+ /* If the device has a mask, use it, otherwise default to 32 bits */
+ if (hwdev && hwdev->dma_mask)
+ mask = *hwdev->dma_mask;
+ return (addr & ~mask) != 0;
+}
+
+static inline int
+range_straddles_page_boundary(void *p, size_t size)
+{
+ extern unsigned long *contiguous_bitmap;
+ return (((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE) &&
+ !test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap));
+}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -24,46 +48,18 @@
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction direction);
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- int i;
+extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
+extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
- BUG_ON(direction == DMA_NONE);
+extern dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction);
- for (i = 0; i < nents; i++ ) {
- BUG_ON(!sg[i].page);
-
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
- }
-
- flush_write_buffers();
- return nents;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
- size_t size, enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
- return page_to_phys(page) + offset;
-}
-
-static inline void
+extern void
dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
-}
-
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
- enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
-}
+ enum dma_data_direction direction);
extern void
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
@@ -93,34 +89,25 @@
dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
enum dma_data_direction direction)
{
+ if (swiotlb)
+ swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
+ flush_write_buffers();
}
static inline void
dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
enum dma_data_direction direction)
{
+ if (swiotlb)
+ swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
flush_write_buffers();
}
-static inline int
-dma_mapping_error(dma_addr_t dma_addr)
-{
- return 0;
-}
+extern int
+dma_mapping_error(dma_addr_t dma_addr);
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
- /*
- * we fall back to GFP_DMA when the mask isn't all 1s,
- * so we can't guarantee allocations that must be
- * within a tighter range than GFP_DMA..
- */
- if(mask < 0x00ffffff)
- return 0;
-
- return 1;
-}
+extern int
+dma_supported(struct device *dev, u64 mask);
static inline int
dma_set_mask(struct device *dev, u64 mask)
@@ -133,6 +120,7 @@
return 0;
}
+#ifdef __i386__
static inline int
dma_get_cache_alignment(void)
{
@@ -140,6 +128,9 @@
* maximum possible, to be safe */
return (1 << L1_CACHE_SHIFT_MAX);
}
+#else
+extern int dma_get_cache_alignment(void);
+#endif
#define dma_is_consistent(d) (1)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h Thu Aug 25
22:53:20 2005
@@ -102,8 +102,8 @@
__end_of_fixed_addresses
};
-extern void __set_fixmap (enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
+extern void __set_fixmap(
+ enum fixed_addresses idx, maddr_t phys, pgprot_t flags);
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Aug 25
22:53:20 2005
@@ -163,7 +163,7 @@
TRAP_INSTR
: "=a" (ret), "=b" (ign)
: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
- : "memory" );
+ : "memory", "ecx" );
return ret;
}
@@ -178,7 +178,7 @@
TRAP_INSTR
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
- : "memory" );
+ : "memory", "ecx" );
return ret;
}
@@ -194,7 +194,7 @@
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op),
"1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
- : "memory" );
+ : "memory", "ecx" );
return ret;
}
@@ -210,7 +210,7 @@
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op),
"1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
- : "memory" );
+ : "memory", "ecx" );
return ret;
}
@@ -228,7 +228,7 @@
: "=a" (ret), "=b" (ign1), "=S" (ign2)
: "0" (__HYPERVISOR_sched_op),
"b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
- "S" (srec) : "memory");
+ "S" (srec) : "memory", "ecx");
return ret;
}
@@ -244,7 +244,7 @@
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op),
"1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
- : "memory" );
+ : "memory", "ecx" );
return ret;
}
@@ -316,16 +316,17 @@
static inline int
HYPERVISOR_update_descriptor(
- unsigned long ma, unsigned long word1, unsigned long word2)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_update_descriptor), "1" (ma), "2" (word1),
- "3" (word2)
+ u64 ma, u64 desc)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_update_descriptor),
+ "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)),
+ "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32))
: "memory" );
return ret;
@@ -385,13 +386,6 @@
#endif
"4" (flags)
: "memory" );
-
- if ( unlikely(ret < 0) )
- {
- printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
- va, (new_val).pte_low, flags);
- BUG();
- }
return ret;
}
@@ -536,12 +530,15 @@
{
int ret;
unsigned long ign1;
+ /* Yes, I really do want to clobber edx here: when we resume a
+ vcpu after unpickling a multi-processor domain, it returns
+ here, but clobbers all of the call clobbered registers. */
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op),
"1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
- : "memory" );
+ : "memory", "ecx", "edx" );
return ret;
}
@@ -557,8 +554,26 @@
: "=a" (ret), "=b" (ign1)
: "0" (__HYPERVISOR_sched_op),
"1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+ : "memory", "ecx" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+ int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int ret;
+ unsigned long ign1, ign2;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+ "2" (ctxt)
: "memory" );
return ret;
}
+
#endif /* __HYPERCALL_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
Thu Aug 25 22:53:20 2005
@@ -124,17 +124,4 @@
#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
-#ifndef __ASSEMBLY__
-/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
-extern int bind_virq_to_irq(int virq);
-extern void unbind_virq_from_irq(int virq);
-extern int bind_ipi_to_irq(int ipi);
-extern void unbind_ipi_from_irq(int ipi);
-extern int bind_evtchn_to_irq(int evtchn);
-extern void unbind_evtchn_from_irq(int evtchn);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_IRQ_VECTORS_H */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
Thu Aug 25 22:53:20 2005
@@ -8,23 +8,12 @@
static char * __init machine_specific_memory_setup(void)
{
- char *who;
- unsigned long start_pfn, max_pfn;
-
- who = "Xen";
-
- /* In dom0, we have to start the fake e820 map above the first
- * 1MB, in other domains, it can start at 0. */
- if (xen_start_info.flags & SIF_INITDOMAIN)
- start_pfn = 0x100;
- else
- start_pfn = 0;
- max_pfn = xen_start_info.nr_pages;
+ unsigned long max_pfn = xen_start_info.nr_pages;
e820.nr_map = 0;
- add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) -
PFN_PHYS(start_pfn), E820_RAM);
+ add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
- return who;
+ return "Xen";
}
void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Thu Aug
25 22:53:20 2005
@@ -34,10 +34,10 @@
* are always kernel segments while inside the kernel. Must
* happen before reload of cr3/ldt (i.e., not in __switch_to).
*/
- __asm__ __volatile__ ( "mov %%fs,%0 ; mov %%gs,%1"
+ asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
: "=m" (*(int *)¤t->thread.fs),
"=m" (*(int *)¤t->thread.gs));
- __asm__ __volatile__ ( "mov %0,%%fs ; mov %0,%%gs"
+ asm volatile ( "mov %0,%%fs ; mov %0,%%gs"
: : "r" (0) );
}
@@ -100,7 +100,7 @@
}
#define deactivate_mm(tsk, mm) \
- asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
+ asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
#define activate_mm(prev, next) \
switch_mm((prev),(next),NULL)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Thu Aug 25
22:53:20 2005
@@ -60,18 +60,50 @@
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(m) ((m) | 0x80000000U)
extern unsigned int *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
-#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
-static inline unsigned long phys_to_machine(unsigned long phys)
-{
- unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+#define pfn_to_mfn(pfn) \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+ unsigned int pfn;
+
+ /*
+ * The array access can fail (e.g., device space beyond end of RAM).
+ * In such cases it doesn't matter what we return (we return garbage),
+ * but we must handle the fault without crashing!
+ */
+ asm (
+ "1: movl %1,%0\n"
+ "2:\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,2b\n"
+ ".previous"
+ : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+
+ return (unsigned long)pfn;
+}
+
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
+static inline maddr_t phys_to_machine(paddr_t phys)
+{
+ maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
return machine;
}
-static inline unsigned long machine_to_phys(unsigned long machine)
-{
- unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+static inline paddr_t machine_to_phys(maddr_t machine)
+{
+ paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
return phys;
}
@@ -86,8 +118,9 @@
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long long pgprot; } pgprot_t;
-#define __pte(x) ({ unsigned long long _x = (x); \
- (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pte(x) ({ unsigned long long _x = (x); \
+ if (_x & 1) _x = phys_to_machine(_x); \
+ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
#define __pgd(x) ({ unsigned long long _x = (x); \
(((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
#define __pmd(x) ({ unsigned long long _x = (x); \
@@ -227,8 +260,10 @@
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
/* VIRT <-> MACHINE conversion */
-#define virt_to_machine(_a) (phys_to_machine(__pa(_a)))
-#define machine_to_virt(_m) (__va(machine_to_phys(_m)))
+#define virt_to_machine(v) (phys_to_machine(__pa(v)))
+#define machine_to_virt(m) (__va(machine_to_phys(m)))
+#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
+#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
#endif /* __KERNEL__ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Thu Aug 25
22:53:20 2005
@@ -43,11 +43,8 @@
struct pci_dev;
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS (0)
/* pci_unmap_{page,single} is a nop so... */
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Thu Aug 25
22:53:20 2005
@@ -14,9 +14,9 @@
do { \
if (unlikely((mm)->context.pinned)) { \
if (!PageHighMem(pte)) \
- HYPERVISOR_update_va_mapping( \
+ BUG_ON(HYPERVISOR_update_va_mapping( \
(unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT),\
- pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0);\
+ pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));\
set_pmd(pmd, __pmd(_PAGE_TABLE + \
((unsigned long long)page_to_pfn(pte) << \
(unsigned long long) PAGE_SHIFT))); \
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Thu Aug
25 22:53:20 2005
@@ -14,7 +14,28 @@
* hook is made available.
*/
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t val )
+{
+ if ( ((mm != current->mm) && (mm != &init_mm)) ||
+ HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+ {
+ set_pte(ptep, val);
+ }
+}
+
+inline static void set_pte_at_sync(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t val )
+{
+ if ( ((mm != current->mm) && (mm != &init_mm)) ||
+ HYPERVISOR_update_va_mapping( (addr), (val), UVMF_INVLPG ) )
+ {
+ set_pte(ptep, val);
+ xen_invlpg(addr);
+ }
+}
+
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
#ifndef CONFIG_XEN_SHADOW_MODE
@@ -42,17 +63,15 @@
*
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- * require. In all the cases we care about, the high bit gets shifted out
- * (e.g., phys_to_machine()) so behaviour there is correct.
+ * require. In all the cases we care about, the FOREIGN_FRAME bit is
+ * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
*/
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Thu Aug
25 22:53:20 2005
@@ -68,7 +68,27 @@
xen_l1_entry_update((pteptr), (pteval))
# define set_pte_atomic(pteptr,pteval) set_pte(pteptr,pteval)
#endif
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t val )
+{
+ if ( ((mm != current->mm) && (mm != &init_mm)) ||
+ HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+ {
+ set_pte(ptep, val);
+ }
+}
+
+inline static void set_pte_at_sync(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t val )
+{
+ if ( ((mm != current->mm) && (mm != &init_mm)) ||
+ HYPERVISOR_update_va_mapping( (addr), (val), UVMF_INVLPG ) )
+ {
+ set_pte(ptep, val);
+ xen_invlpg(addr);
+ }
+}
#ifdef CONFIG_XEN_SHADOW_MODE
# define set_pmd(pmdptr,pmdval) \
@@ -130,14 +150,13 @@
return !pte.pte_low && !pte.pte_high;
}
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
-#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) /* FIXME */
+#define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\
+ (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) )
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Thu Aug 25
22:53:20 2005
@@ -32,7 +32,7 @@
*/
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
extern unsigned long empty_zero_page[1024];
-extern pgd_t swapper_pg_dir[1024];
+extern pgd_t *swapper_pg_dir;
extern kmem_cache_t *pgd_cache;
extern kmem_cache_t *pmd_cache;
extern spinlock_t pgd_lock;
@@ -398,7 +398,7 @@
do { \
if (__dirty) { \
if ( likely((__vma)->vm_mm == current->mm) ) { \
- HYPERVISOR_update_va_mapping((__address),
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
+ BUG_ON(HYPERVISOR_update_va_mapping((__address),
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned
long)((__vma)->vm_mm->cpu_vm_mask.bits))); \
} else { \
xen_l1_entry_update((__ptep), (__entry)); \
flush_tlb_page((__vma), (__address)); \
@@ -416,8 +416,8 @@
#define ptep_establish_new(__vma, __address, __ptep, __entry) \
do { \
if (likely((__vma)->vm_mm == current->mm)) { \
- HYPERVISOR_update_va_mapping((__address), \
- __entry, 0); \
+ BUG_ON(HYPERVISOR_update_va_mapping((__address), \
+ __entry, 0)); \
} else { \
xen_l1_entry_update((__ptep), (__entry)); \
} \
@@ -450,7 +450,7 @@
#define arbitrary_virt_to_machine(__va)
\
({ \
pte_t *__pte = virt_to_ptep(__va); \
- unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \
+ maddr_t __pa = (maddr_t)pte_mfn(*__pte) << PAGE_SHIFT; \
__pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
})
@@ -466,10 +466,12 @@
unsigned long size,
pgprot_t prot,
domid_t domid);
-int __direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long size,
- mmu_update_t *v);
+int create_lookup_pte_addr(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long *ptep);
+int touch_pte_range(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size);
#define io_remap_page_range(vma,from,phys,size,prot) \
direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Thu Aug
25 22:53:20 2005
@@ -1,89 +1,1 @@
-#ifndef _X8664_DMA_MAPPING_H
-#define _X8664_DMA_MAPPING_H 1
-
-/*
- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
- * documentation.
- */
-
-#include <linux/config.h>
-
-#include <asm/scatterlist.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-extern dma_addr_t bad_dma_address;
-#define dma_mapping_error(x) \
- (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
-
-void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t
*dma_handle,
- unsigned gfp);
-void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle);
-
-extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size,
- enum dma_data_direction direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
- enum dma_data_direction direction);
-
-#define dma_map_page(dev,page,offset,size,dir) \
- dma_map_single((dev), page_address(page)+(offset), (size), (dir))
-
-extern void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction);
-
-extern void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
- enum dma_data_direction direction);
-
-static inline void dma_sync_sg_for_cpu(struct device *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- if (swiotlb)
- return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction);
-
- flush_write_buffers();
-}
-
-static inline void dma_sync_sg_for_device(struct device *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- if (swiotlb)
- return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction);
-
- flush_write_buffers();
-}
-
-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
-
-#define dma_unmap_page dma_unmap_single
-
-extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_get_cache_alignment(void);
-#define dma_is_consistent(h) 1
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
- return 0;
-}
-
-static inline void dma_cache_sync(void *vaddr, size_t size, enum
dma_data_direction dir)
-{
- flush_write_buffers();
-}
-#endif
+#include <asm-i386/dma-mapping.h>
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Aug
25 22:53:20 2005
@@ -502,4 +502,21 @@
return ret;
}
+static inline int
+HYPERVISOR_vcpu_pickle(
+ int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op),
+ "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+ "S" ((unsigned long)ctxt)
+ : __syscall_clobber );
+
+ return ret;
+}
+
#endif /* __HYPERCALL_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
Thu Aug 25 22:53:20 2005
@@ -122,17 +122,4 @@
#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
-#ifndef __ASSEMBLY__
-/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
-extern int bind_virq_to_irq(int virq);
-extern void unbind_virq_from_irq(int virq);
-extern int bind_ipi_to_irq(int ipi);
-extern void unbind_ipi_from_irq(int ipi);
-extern int bind_evtchn_to_irq(int evtchn);
-extern void unbind_evtchn_from_irq(int evtchn);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_IRQ_VECTORS_H */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Thu Aug 25
22:53:20 2005
@@ -62,19 +62,46 @@
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(m) ((m) | 0x80000000U)
extern u32 *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned
int)(_pfn)])
-#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned
int)(_mfn)])
-static inline unsigned long phys_to_machine(unsigned long phys)
-{
- unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+#define pfn_to_mfn(pfn) \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+ unsigned int pfn;
+
+ /*
+ * The array access can fail (e.g., device space beyond end of RAM).
+ * In such cases it doesn't matter what we return (we return garbage),
+ * but we must handle the fault without crashing!
+ */
+ asm (
+ "1: movl %1,%k0\n"
+ "2:\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8\n"
+ " .quad 1b,2b\n"
+ ".previous"
+ : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+
+ return (unsigned long)pfn;
+}
+
+/* Definitions for machine and pseudophysical addresses. */
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+
+static inline maddr_t phys_to_machine(paddr_t phys)
+{
+ maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
return machine;
}
-static inline unsigned long machine_to_phys(unsigned long machine)
-{
- unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+static inline paddr_t machine_to_phys(maddr_t machine)
+{
+ paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
return phys;
}
@@ -211,8 +238,10 @@
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
/* VIRT <-> MACHINE conversion */
-#define virt_to_machine(_a) (phys_to_machine(__pa(_a)))
-#define machine_to_virt(_m) (__va(machine_to_phys(_m)))
+#define virt_to_machine(v) (phys_to_machine(__pa(v)))
+#define machine_to_virt(m) (__va(machine_to_phys(m)))
+#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
+#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
#define VM_DATA_DEFAULT_FLAGS \
(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Thu Aug 25
22:53:20 2005
@@ -79,7 +79,9 @@
#else
/* No IOMMU */
-#define PCI_DMA_BUS_IS_PHYS 1
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS (0)
+
#define pci_dac_dma_supported(pci_dev, mask) 1
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Aug 24
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Aug 25
22:53:20 2005
@@ -4,31 +4,20 @@
/*
* This file contains the functions and defines necessary to modify and use
* the x86-64 page table tree.
- *
- * x86-64 has a 4 level table setup. Generic linux MM only supports
- * three levels. The fourth level is currently a single static page that
- * is shared by everybody and just contains a pointer to the current
- * three level page setup on the beginning and some kernel mappings at
- * the end. For more details see Documentation/x86_64/mm.txt
*/
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/bitops.h>
#include <linux/threads.h>
+#include <linux/sched.h>
#include <asm/pda.h>
+#ifdef CONFIG_XEN
#include <asm-xen/hypervisor.h>
+
extern pud_t level3_user_pgt[512];
-extern pud_t init_level4_pgt[];
extern pud_t init_level4_user_pgt[];
-extern unsigned long __supported_pte_mask;
-
-#define swapper_pg_dir NULL
-
-extern int nonx_setup(char *str);
-extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-
-extern unsigned long pgkern_mask;
+
+extern void xen_init_pt(void);
#define virt_to_ptep(__va) \
({ \
@@ -44,6 +33,22 @@
unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \
__pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
})
+#endif
+
+extern pud_t level3_kernel_pgt[512];
+extern pud_t level3_physmem_pgt[512];
+extern pud_t level3_ident_pgt[512];
+extern pmd_t level2_kernel_pgt[512];
+extern pgd_t init_level4_pgt[];
+extern unsigned long __supported_pte_mask;
+
+#define swapper_pg_dir init_level4_pgt
+
+extern int nonx_setup(char *str);
+extern void paging_init(void);
+extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+extern unsigned long pgkern_mask;
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -52,11 +57,14 @@
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
#define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512
/*
- * PUDIR_SHIFT determines what a top-level page table entry can map
+ * 3rd level page
*/
#define PUD_SHIFT 30
#define PTRS_PER_PUD 512
@@ -80,7 +88,7 @@
#define pud_ERROR(e) \
printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e),
pud_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e),
pgd_val(e))
+ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e),
pgd_val(e))
#define pgd_none(x) (!pgd_val(x))
#define pud_none(x) (!pud_val(x))
@@ -90,18 +98,10 @@
extern inline int pud_present(pud_t pud) { return !pud_none(pud); }
-#ifdef CONFIG_SMP
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-
-#else
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-#if 0
static inline void set_pte(pte_t *dst, pte_t val)
{
*dst = val;
}
-#endif
-#endif
#define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval))
#define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval))
@@ -132,6 +132,9 @@
* each domain will have separate page tables, with their own versions of
* accessed & dirty state.
*/
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0))
+
+#if 0
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long
addr, pte_t *xp)
{
pte_t pte = *xp;
@@ -139,21 +142,22 @@
set_pte(xp, __pte_ma(0));
return pte;
}
+#endif
#define pte_same(a, b) ((a).pte == (b).pte)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
#define FIRST_USER_ADDRESS 0
#ifndef __ASSEMBLY__
-#define MAXMEM 0x3fffffffffffUL
+#define MAXMEM 0x3fffffffffffUL
#define VMALLOC_START 0xffffc20000000000UL
#define VMALLOC_END 0xffffe1ffffffffffUL
#define MODULES_VADDR 0xffffffff88000000UL
@@ -262,7 +266,16 @@
val &= ~(_PAGE_USER | _PAGE_DIRTY);
return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
}
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t val )
+{
+ if ( ((mm != current->mm) && (mm != &init_mm)) ||
+ HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+ {
+ set_pte(ptep, val);
+ }
+}
#define pte_none(x) (!(x).pte)
#define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
@@ -287,17 +300,15 @@
*
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- * require. In all the cases we care about, the high bit gets shifted out
- * (e.g., phys_to_machine()) so behaviour there is correct.
- */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+ * require. In all the cases we care about, the FOREIGN_FRAME bit is
+ * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
+ */
#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
@@ -347,7 +358,7 @@
pte_t pte = *ptep;
int ret = pte_dirty(pte);
if (ret)
- xen_l1_entry_update(ptep, pte_mkclean(pte));
+ set_pte(ptep, pte_mkclean(pte));
return ret;
}
@@ -356,7 +367,7 @@
pte_t pte = *ptep;
int ret = pte_young(pte);
if (ret)
- xen_l1_entry_update(ptep, pte_mkold(pte));
+ set_pte(ptep, pte_mkold(pte));
return ret;
}
@@ -398,7 +409,7 @@
/* PUD - Level3 access */
/* to find an entry in a page-table-directory. */
-#define pud_index(address) ((address >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) +
pud_index(address))
static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
{
@@ -413,7 +424,7 @@
{
unsigned long addr;
- addr = pud_val(init_level4_pgt[pud_index(address)]);
+ addr = pgd_val(init_level4_pgt[pud_index(address)]);
addr &= PHYSICAL_PAGE_MASK; /* machine physical */
addr = machine_to_phys(addr);
return __pud_offset_k((pud_t *)__va(addr), address);
@@ -427,9 +438,11 @@
#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
pmd_index(address))
#define pmd_none(x) (!pmd_val(x))
-#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+ can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) ((pmd_val(x) & ~PTE_MASK) != _KERNPG_TABLE )
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT))
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
@@ -479,11 +492,24 @@
* race with other CPU's that might be updating the dirty
* bit at the same time. */
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#if 0
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
if (__dirty) { \
set_pte(__ptep, __entry); \
flush_tlb_page(__vma, __address); \
+ } \
+ } while (0)
+#endif
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ if (__dirty) { \
+ if ( likely((__vma)->vm_mm == current->mm) ) { \
+ BUG_ON(HYPERVISOR_update_va_mapping((__address),
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned
long)((__vma)->vm_mm->cpu_vm_mask.bits))); \
+ } else { \
+ xen_l1_entry_update((__ptep), (__entry)); \
+ flush_tlb_page((__vma), (__address)); \
+ } \
} \
} while (0)
@@ -510,12 +536,18 @@
unsigned long address,
unsigned long size,
mmu_update_t *v);
+int create_lookup_pte_addr(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long *ptep);
+int touch_pte_range(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size);
#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
- remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+
direct_remap_area_pages((vma)->vm_mm,vaddr,paddr,size,prot,DOMID_IO)
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
+
direct_remap_area_pages((vma)->vm_mm,vaddr,(pfn)<<PAGE_SHIFT,size,prot,DOMID_IO)
#define MK_IOSPACE_PFN(space, pfn) (pfn)
#define GET_IOSPACE(pfn) 0
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h Wed Aug
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h Thu Aug
25 22:53:20 2005
@@ -153,6 +153,20 @@
} while (0);
}
}
+
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+#ifndef CONFIG_XEN
+ mmu_cr4_features &= ~mask;
+ __asm__("movq %%cr4,%%rax\n\t"
+ "andq %0,%%rax\n\t"
+ "movq %%rax,%%cr4\n"
+ : : "irg" (~mask)
+ :"ax");
+#endif
+}
+
#define load_cr3(pgdir) do { \
xen_pt_switch(__pa(pgdir)); \
@@ -283,9 +297,9 @@
load_gs_index(0);
\
(regs)->rip = (new_rip);
\
(regs)->rsp = (new_rsp);
\
- write_pda(oldrsp, (new_rsp));
\
- (regs)->cs = __USER_CS;
\
- (regs)->ss = __USER_DS;
\
+ write_pda(oldrsp, (new_rsp));
\
+ (regs)->cs = __USER_CS;
\
+ (regs)->ss = __USER_DS;
\
(regs)->eflags = 0x200;
\
set_fs(USER_DS);
\
} while(0)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/balloon.h
--- a/linux-2.6-xen-sparse/include/asm-xen/balloon.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/balloon.h Thu Aug 25 22:53:20 2005
@@ -35,10 +35,19 @@
* Inform the balloon driver that it should allow some slop for device-driver
* memory activities.
*/
-extern void balloon_update_driver_allowance(long delta);
+extern void
+balloon_update_driver_allowance(
+ long delta);
-/* Give up unmapped pages to the balloon driver. */
-extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns);
+/* Allocate an empty low-memory page range. */
+extern struct page *
+balloon_alloc_empty_page_range(
+ unsigned long nr_pages);
+
+/* Deallocate an empty page range, adding to the balloon. */
+extern void
+balloon_dealloc_empty_page_range(
+ struct page *page, unsigned long nr_pages);
/*
* Prevent the balloon driver from changing the memory reservation during
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/evtchn.h
--- a/linux-2.6-xen-sparse/include/asm-xen/evtchn.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/evtchn.h Thu Aug 25 22:53:20 2005
@@ -32,6 +32,7 @@
#define __ASM_EVTCHN_H__
#include <linux/config.h>
+#include <linux/interrupt.h>
#include <asm-xen/hypervisor.h>
#include <asm/ptrace.h>
#include <asm-xen/synch_bitops.h>
@@ -41,6 +42,34 @@
/*
* LOW-LEVEL DEFINITIONS
*/
+
+/* Dynamically bind a VIRQ source to Linux IRQ space. */
+extern int bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+
+/* Dynamically bind an IPI source to Linux IRQ space. */
+extern int bind_ipi_to_irq(int ipi);
+extern void unbind_ipi_from_irq(int ipi);
+
+/* Dynamically bind an event-channel port to Linux IRQ space. */
+extern int bind_evtchn_to_irq(unsigned int evtchn);
+extern void unbind_evtchn_from_irq(unsigned int evtchn);
+
+/*
+ * Dynamically bind an event-channel port to an IRQ-like callback handler.
+ * On some platforms this may not be implemented via the Linux IRQ subsystem.
+ * You *cannot* trust the irq argument passed to the callback handler.
+ */
+extern int bind_evtchn_to_irqhandler(
+ unsigned int evtchn,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+extern void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
/* Entry point for notifications into Linux subsystems. */
asmlinkage void evtchn_do_upcall(struct pt_regs *regs);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Thu Aug 25 22:53:20 2005
@@ -19,54 +19,48 @@
/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
#define NR_GRANT_FRAMES 4
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
-int
-gnttab_grant_foreign_access(
- domid_t domid, unsigned long frame, int readonly);
+struct gnttab_free_callback {
+ struct gnttab_free_callback *next;
+ void (*fn)(void *);
+ void *arg;
+ u16 count;
+};
-void
-gnttab_end_foreign_access(
- grant_ref_t ref, int readonly);
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+ int readonly);
-int
-gnttab_grant_foreign_transfer(
- domid_t domid, unsigned long pfn);
+void gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
-unsigned long
-gnttab_end_foreign_transfer(
- grant_ref_t ref);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
-int
-gnttab_query_foreign_access(
- grant_ref_t ref );
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+int gnttab_query_foreign_access(grant_ref_t ref);
/*
* operations on reserved batches of grant references
*/
-int
-gnttab_alloc_grant_references(
- u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
-void
-gnttab_free_grant_references(
- u16 count, grant_ref_t private_head );
+void gnttab_free_grant_reference(grant_ref_t ref);
-int
-gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
-);
+void gnttab_free_grant_references(grant_ref_t head);
-void
-gnttab_release_grant_reference(
- grant_ref_t *private_head, grant_ref_t release );
+int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
-void
-gnttab_grant_foreign_access_ref(
- grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+ grant_ref_t release);
-void
-gnttab_grant_foreign_transfer_ref(
- grant_ref_t, domid_t domid, unsigned long pfn);
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count);
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ unsigned long pfn);
#endif /* __ASM_GNTTAB_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Thu Aug 25 22:53:20 2005
@@ -134,12 +134,8 @@
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
#endif /* linux < 2.6.0 */
-void xen_contig_memory(unsigned long vstart, unsigned int order);
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-/* Allocate a contiguous empty region of low memory. Return virtual start. */
-unsigned long allocate_empty_lowmem_region(unsigned long pages);
-#endif
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
#include <asm/hypercall.h>
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Thu Aug 25 22:53:20 2005
@@ -1,5 +1,3 @@
-#ifndef _ASM_XEN_XENBUS_H
-#define _ASM_XEN_XENBUS_H
/******************************************************************************
* xenbus.h
*
@@ -28,13 +26,17 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
+
+#ifndef _ASM_XEN_XENBUS_H
+#define _ASM_XEN_XENBUS_H
+
#include <linux/device.h>
+#include <linux/notifier.h>
#include <asm/semaphore.h>
/* A xenbus device. */
struct xenbus_device {
char *devicetype;
- char *subtype;
char *nodename;
struct device dev;
int has_error;
@@ -50,7 +52,6 @@
{
/* .../device/<device_type>/<identifier> */
char devicetype[32]; /* General class of device. */
- char subtype[32]; /* Contents of "subtype" for this device */
};
/* A xenbus driver. */
@@ -58,9 +59,11 @@
char *name;
struct module *owner;
const struct xenbus_device_id *ids;
- int (*probe) (struct xenbus_device * dev,
- const struct xenbus_device_id * id);
- int (*remove) (struct xenbus_device * dev);
+ int (*probe)(struct xenbus_device *dev,
+ const struct xenbus_device_id *id);
+ int (*remove)(struct xenbus_device *dev);
+ int (*suspend)(struct xenbus_device *dev);
+ int (*resume)(struct xenbus_device *dev);
struct device_driver driver;
};
@@ -69,7 +72,8 @@
return container_of(drv, struct xenbus_driver, driver);
}
-int xenbus_register_driver(struct xenbus_driver *drv);
+int xenbus_register_device(struct xenbus_driver *drv);
+int xenbus_register_backend(struct xenbus_driver *drv);
void xenbus_unregister_driver(struct xenbus_driver *drv);
/* Caller must hold this lock to call these functions: it's also held
@@ -112,7 +116,26 @@
void (*callback)(struct xenbus_watch *, const char *node);
};
+/* notifer routines for when the xenstore comes up */
+int register_xenstore_notifier(struct notifier_block *nb);
+void unregister_xenstore_notifier(struct notifier_block *nb);
+
int register_xenbus_watch(struct xenbus_watch *watch);
void unregister_xenbus_watch(struct xenbus_watch *watch);
+void reregister_xenbus_watches(void);
+
+/* Called from xen core code. */
+void xenbus_suspend(void);
+void xenbus_resume(void);
+
+#define XENBUS_IS_ERR_READ(str) ({ \
+ if (!IS_ERR(str) && strlen(str) == 0) { \
+ kfree(str); \
+ str = ERR_PTR(-ERANGE); \
+ } \
+ IS_ERR(str); \
+})
+
+#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
#endif /* _ASM_XEN_XENBUS_H */
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/linux/mm.h Thu Aug 25 22:53:20 2005
@@ -817,6 +817,12 @@
int remap_pfn_range(struct vm_area_struct *, unsigned long,
unsigned long, unsigned long, pgprot_t);
+typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr,
+ void *data);
+extern int generic_page_range(struct mm_struct *mm, unsigned long address,
+ unsigned long size, pte_fn_t fn, void *data);
+
+
#ifdef CONFIG_PROC_FS
void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
#else
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/mkbuildtree
--- a/linux-2.6-xen-sparse/mkbuildtree Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/mkbuildtree Thu Aug 25 22:53:20 2005
@@ -102,10 +102,11 @@
relative_lndir ${RS}
rm -f mkbuildtree
-
# Create links to the shared definitions of the Xen interfaces.
rm -rf ${AD}/include/asm-xen/xen-public
mkdir ${AD}/include/asm-xen/xen-public
cd ${AD}/include/asm-xen/xen-public
relative_lndir ../../../${RS}/../xen/include/public
+cd ${AD}/drivers/xen/xenbus
+ln -sf ../../../${RS}/../tools/xenstore/xenstored.h
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c Thu Aug 25 22:53:20 2005
@@ -954,8 +954,10 @@
i++;
start += PAGE_SIZE;
len--;
+printk(KERN_ALERT "HIT 0x%lx\n", start);
continue;
- }
+ }
+else printk(KERN_ALERT "MISS 0x%lx\n", start);
}
if (!vma || (vma->vm_flags & VM_IO)
@@ -1213,6 +1215,104 @@
}
EXPORT_SYMBOL(remap_pfn_range);
+static inline int generic_pte_range(struct mm_struct *mm,
+ pmd_t *pmd,
+ unsigned long addr,
+ unsigned long end,
+ pte_fn_t fn, void *data)
+{
+ pte_t *pte;
+ int err;
+ struct page *pte_page;
+
+ pte = (mm == &init_mm) ?
+ pte_alloc_kernel(mm, pmd, addr) :
+ pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
+
+ pte_page = pmd_page(*pmd);
+
+ do {
+ err = fn(pte, pte_page, addr, data);
+ if (err)
+ break;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ if (mm != &init_mm)
+ pte_unmap(pte-1);
+ return err;
+
+}
+
+static inline int generic_pmd_range(struct mm_struct *mm,
+ pud_t *pud,
+ unsigned long addr,
+ unsigned long end,
+ pte_fn_t fn, void *data)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ int err;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+ err = generic_pte_range(mm, pmd, addr, next, fn, data);
+ if (err)
+ break;
+ } while (pmd++, addr = next, addr != end);
+ return err;
+}
+
+static inline int generic_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end,
+ pte_fn_t fn, void *data)
+{
+ pud_t *pud;
+ unsigned long next;
+ int err;
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ err = generic_pmd_range(mm, pud, addr, next, fn, data);
+ if (err)
+ break;
+ } while (pud++, addr = next, addr != end);
+ return err;
+}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int generic_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long end = addr + size;
+ int err;
+
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ spin_lock(&mm->page_table_lock);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = generic_pud_range(mm, pgd, addr, next, fn, data);
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+ spin_unlock(&mm->page_table_lock);
+ return err;
+}
+
/*
* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
* servicing faults for write access. In the normal case, do always want
diff -r 5f1ed597f107 -r 8799d14bef77 tools/Makefile
--- a/tools/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/Makefile Thu Aug 25 22:53:20 2005
@@ -13,7 +13,8 @@
#SUBDIRS += pygrub
SUBDIRS += firmware
SUBDIRS += security
-#SUBDIRS += consoled
+SUBDIRS += console
+SUBDIRS += xenstat
.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
diff -r 5f1ed597f107 -r 8799d14bef77 tools/Rules.mk
--- a/tools/Rules.mk Wed Aug 24 02:43:18 2005
+++ b/tools/Rules.mk Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
XEN_LIBXC = $(XEN_ROOT)/tools/libxc
XEN_XCS = $(XEN_ROOT)/tools/xcs
XEN_XENSTORE = $(XEN_ROOT)/tools/xenstore
+XEN_LIBXENSTAT = $(XEN_ROOT)/tools/xenstat/libxenstat/src
ifeq ($(XEN_TARGET_ARCH),x86_32)
CFLAGS += -m32 -march=i686
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/blktaplib.c Thu Aug 25 22:53:20 2005
@@ -34,7 +34,7 @@
#else
#define DPRINTF(_f, _a...) ((void)0)
#endif
-#define DEBUG_RING_IDXS 0
+#define DEBUG_RING_IDXS 1
#define POLLRDNORM 0x040
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/blktaplib.h Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
#ifndef __BLKTAPLIB_H__
#define __BLKTAPLIB_H__
-#include <xc.h>
+#include <xenctrl.h>
#include <sys/user.h>
#include <xen/xen.h>
#include <xen/io/blkif.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/block-async.h Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
#define _BLOCKASYNC_H_
#include <assert.h>
-#include <xc.h>
+#include <xenctrl.h>
#include "vdi.h"
struct io_ret
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/blockstore.h Thu Aug 25 22:53:20 2005
@@ -10,7 +10,7 @@
#define __BLOCKSTORE_H__
#include <netinet/in.h>
-#include <xc.h>
+#include <xenctrl.h>
#define BLOCK_SIZE 4096
#define BLOCK_SHIFT 12
diff -r 5f1ed597f107 -r 8799d14bef77
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
Thu Aug 25 22:53:20 2005
@@ -35,7 +35,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
-#include <xc.h>
+#include <xenctrl.h>
#define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */
long (*myptrace)(enum __ptrace_request, pid_t, long, long);
int (*myxcwait)(int domain, int *status, int options) ;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/Makefile
--- a/tools/debugger/libxendebug/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/Makefile Thu Aug 25 22:53:20 2005
@@ -20,7 +20,7 @@
CFLAGS += -Wp,-MD,.$(@F).d
DEPS = .*.d
-LDFLAGS += -L$(XEN_ROOT)/tools/libxc -lxc
+LDFLAGS += -L$(XEN_ROOT)/tools/libxc -lxenctrl
LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/xendebug.c Thu Aug 25 22:53:20 2005
@@ -12,7 +12,7 @@
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
-#include <xc.h>
+#include <xenctrl.h>
#include "list.h"
#if defined(__i386__)
@@ -40,7 +40,7 @@
typedef struct bwcpoint /* break/watch/catch point */
{
struct list_head list;
- memory_t address;
+ unsigned long address;
u32 domain;
u8 old_value; /* old value for software bkpt */
} bwcpoint_t, *bwcpoint_p;
@@ -311,7 +311,7 @@
/* access to one page */
static int
xendebug_memory_page (domain_context_p ctxt, int xc_handle, u32 vcpu,
- int protection, memory_t address, int length, u8 *buffer)
+ int protection, unsigned long address, int length, u8
*buffer)
{
vcpu_guest_context_t *vcpu_ctxt = &ctxt->context[vcpu];
unsigned long pde, page;
@@ -407,7 +407,7 @@
/* divide a memory operation into accesses to individual pages */
static int
xendebug_memory_op (domain_context_p ctxt, int xc_handle, u32 vcpu,
- int protection, memory_t address, int length, u8 *buffer)
+ int protection, unsigned long address, int length, u8
*buffer)
{
int remain; /* number of bytes to touch past this page */
int bytes = 0;
@@ -431,7 +431,7 @@
xendebug_read_memory(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length,
u8 *data)
{
@@ -451,7 +451,7 @@
xendebug_write_memory(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length,
u8 *data)
{
@@ -471,7 +471,7 @@
xendebug_insert_memory_breakpoint(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length)
{
bwcpoint_p bkpt;
@@ -517,7 +517,7 @@
xendebug_remove_memory_breakpoint(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length)
{
bwcpoint_p bkpt = NULL;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/xendebug.h
--- a/tools/debugger/libxendebug/xendebug.h Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/xendebug.h Thu Aug 25 22:53:20 2005
@@ -9,7 +9,7 @@
#ifndef _XENDEBUG_H_DEFINED
#define _XENDEBUG_H_DEFINED
-#include <xc.h>
+#include <xenctrl.h>
int xendebug_attach(int xc_handle,
u32 domid,
@@ -45,7 +45,7 @@
int xendebug_read_memory(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length,
u8 *data);
@@ -53,7 +53,7 @@
int xendebug_write_memory(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length,
u8 *data);
@@ -61,13 +61,13 @@
int xendebug_insert_memory_breakpoint(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length);
int xendebug_remove_memory_breakpoint(int xc_handle,
u32 domid,
u32 vcpu,
- memory_t address,
+ unsigned long address,
u32 length);
int xendebug_query_domain_stop(int xc_handle,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Domain.ml
--- a/tools/debugger/pdb/Domain.ml Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Domain.ml Thu Aug 25 22:53:20 2005
@@ -36,6 +36,7 @@
Printf.sprintf "{domain} domain: %d, vcpu: %d"
ctx.domain ctx.vcpu
+external read_register : context_t -> int -> int32 = "dom_read_register"
external read_registers : context_t -> registers = "dom_read_registers"
external write_register : context_t -> register -> int32 -> unit =
"dom_write_register"
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Domain.mli
--- a/tools/debugger/pdb/Domain.mli Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Domain.mli Thu Aug 25 22:53:20 2005
@@ -22,6 +22,7 @@
val string_of_context : context_t -> string
+val read_register : context_t -> int -> int32
val read_registers : context_t -> registers
val write_register : context_t -> register -> int32 -> unit
val read_memory : context_t -> int32 -> int -> int list
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Makefile
--- a/tools/debugger/pdb/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Makefile Thu Aug 25 22:53:20 2005
@@ -33,7 +33,8 @@
LIBS += unix str
# bc = byte-code, dc = debug byte-code
-all : patches dc
+# patches = patch linux domU source code
+all : dc
SOURCES += pdb_caml_xc.c
SOURCES += pdb_caml_domain.c pdb_caml_process.c
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/PDB.ml
--- a/tools/debugger/pdb/PDB.ml Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/PDB.ml Thu Aug 25 22:53:20 2005
@@ -219,6 +219,17 @@
(***************************************************************************)
+let read_register ctx register = (* register is int32 because of sscanf *)
+ match ctx with
+ | Void -> 0l (* default for startup *)
+ | Domain d -> Domain.read_register d register
+ | Process p ->
+ begin
+ Process.read_register p register;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "read registers")
+
let read_registers ctx =
match ctx with
| Void -> Intel.null_registers (* default for startup *)
@@ -278,14 +289,42 @@
let insert_memory_breakpoint ctx addr len =
match ctx with
| Domain d -> Domain.insert_memory_breakpoint d addr len
- | Process p -> Process.insert_memory_breakpoint p addr len
+ | Process p ->
+ begin
+ Process.insert_memory_breakpoint p addr len;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "insert memory breakpoint")
let remove_memory_breakpoint ctx addr len =
match ctx with
| Domain d -> Domain.remove_memory_breakpoint d addr len
- | Process p -> Process.remove_memory_breakpoint p addr len
+ | Process p ->
+ begin
+ Process.remove_memory_breakpoint p addr len;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "remove memory breakpoint")
+
+let insert_watchpoint ctx kind addr len =
+ match ctx with
+(* | Domain d -> Domain.insert_watchpoint d kind addr len TODO *)
+ | Process p ->
+ begin
+ Process.insert_watchpoint p kind addr len;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "insert watchpoint")
+
+let remove_watchpoint ctx kind addr len =
+ match ctx with
+(* | Domain d -> Domain.remove_watchpoint d kind addr len TODO *)
+ | Process p ->
+ begin
+ Process.remove_watchpoint p kind addr len;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "remove watchpoint")
let pause ctx =
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Process.ml
--- a/tools/debugger/pdb/Process.ml Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Process.ml Thu Aug 25 22:53:20 2005
@@ -54,6 +54,7 @@
proc_ctx.ring <- Xen_domain.get_ring dom_ctx;
_attach_debugger proc_ctx
+external read_register : context_t -> int -> unit = "proc_read_register"
external read_registers : context_t -> unit = "proc_read_registers"
external write_register : context_t -> register -> int32 -> unit =
"proc_write_register"
@@ -69,6 +70,10 @@
"proc_insert_memory_breakpoint"
external remove_memory_breakpoint : context_t -> int32 -> int -> unit =
"proc_remove_memory_breakpoint"
+external insert_watchpoint : context_t -> int -> int32 -> int -> unit =
+ "proc_insert_watchpoint"
+external remove_watchpoint : context_t -> int -> int32 -> int -> unit =
+ "proc_remove_watchpoint"
let pause ctx =
pause_target ctx
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Process.mli
--- a/tools/debugger/pdb/Process.mli Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Process.mli Thu Aug 25 22:53:20 2005
@@ -26,7 +26,7 @@
val detach_debugger : context_t -> unit
val pause : context_t -> unit
-
+val read_register : context_t -> int -> unit
val read_registers : context_t -> unit
val write_register : context_t -> register -> int32 -> unit
val read_memory : context_t -> int32 -> int -> unit
@@ -37,3 +37,5 @@
val insert_memory_breakpoint : context_t -> int32 -> int -> unit
val remove_memory_breakpoint : context_t -> int32 -> int -> unit
+val insert_watchpoint : context_t -> int -> int32 -> int -> unit
+val remove_watchpoint : context_t -> int -> int32 -> int -> unit
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/debugger.ml
--- a/tools/debugger/pdb/debugger.ml Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/debugger.ml Thu Aug 25 22:53:20 2005
@@ -53,10 +53,20 @@
PDB.step ctx;
raise No_reply
+(**
+ Read Register Command.
+ return register as a 4-byte value.
+ *)
+let gdb_read_register ctx command =
+ let read_reg register =
+ (Printf.sprintf "%08lx" (Util.flip_int32 (PDB.read_register ctx register)))
+ in
+ Scanf.sscanf command "p%x" read_reg
+
(**
Read Registers Command.
- returns 16 4-byte registers in a particular defined by gdb.
+ returns 16 4-byte registers in a particular format defined by gdb.
*)
let gdb_read_registers ctx =
let regs = PDB.read_registers ctx in
@@ -100,7 +110,7 @@
with
Failure s -> "E02"
in
- Scanf.sscanf command "m%lx,%d" read_mem
+ Scanf.sscanf command "m%lx,%x" read_mem
@@ -218,16 +228,24 @@
(**
Insert Breakpoint or Watchpoint Packet
*)
+
+let bwc_watch_write = 102 (* from pdb_module.h *)
+let bwc_watch_read = 103
+let bwc_watch_access = 104
+
let gdb_insert_bwcpoint ctx command =
let insert cmd addr length =
try
match cmd with
| 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK"
+ | 2 -> PDB.insert_watchpoint ctx bwc_watch_write addr length; "OK"
+ | 3 -> PDB.insert_watchpoint ctx bwc_watch_read addr length; "OK"
+ | 4 -> PDB.insert_watchpoint ctx bwc_watch_access addr length; "OK"
| _ -> ""
with
Failure s -> "E03"
in
- Scanf.sscanf command "Z%d,%lx,%d" insert
+ Scanf.sscanf command "Z%d,%lx,%x" insert
(**
Remove Breakpoint or Watchpoint Packet
@@ -237,6 +255,9 @@
try
match cmd with
| 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK"
+ | 2 -> PDB.remove_watchpoint ctx bwc_watch_write addr length; "OK"
+ | 3 -> PDB.remove_watchpoint ctx bwc_watch_read addr length; "OK"
+ | 4 -> PDB.remove_watchpoint ctx bwc_watch_access addr length; "OK"
| _ -> ""
with
Failure s -> "E04"
@@ -260,6 +281,7 @@
| 'k' -> gdb_kill ()
| 'm' -> gdb_read_memory ctx command
| 'M' -> gdb_write_memory ctx command
+ | 'p' -> gdb_read_register ctx command
| 'P' -> gdb_write_register ctx command
| 'q' -> gdb_query command
| 's' -> gdb_step ctx
@@ -270,7 +292,7 @@
| 'Z' -> gdb_insert_bwcpoint ctx command
| _ ->
print_endline (Printf.sprintf "unknown gdb command [%s]" command);
- "E02"
+ ""
with
Unimplemented s ->
print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]"
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/linux-2.6-module/debug.c
--- a/tools/debugger/pdb/linux-2.6-module/debug.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/debug.c Thu Aug 25 22:53:20 2005
@@ -9,33 +9,143 @@
#include <asm-i386/kdebug.h>
#include <asm-xen/asm-i386/processor.h>
#include <asm-xen/asm-i386/ptrace.h>
+#include <asm-xen/asm-i386/tlbflush.h>
#include <asm-xen/xen-public/xen.h>
#include "pdb_module.h"
#include "pdb_debug.h"
-#define BWC_DEBUG 1
-#define BWC_INT3 3
+
+static int pdb_debug_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition);
+static int pdb_int3_fn (struct pt_regs *regs, long error_code);
+static int pdb_page_fault_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition);
+
+/***********************************************************************/
+
typedef struct bwcpoint /* break/watch/catch point */
{
struct list_head list;
- memory_t address;
- u32 domain;
+ unsigned long address;
+ int length;
+
+ u8 type; /* BWC_??? */
+ u8 mode; /* for BWC_PAGE, the current protection mode */
u32 process;
- u8 old_value; /* old value for software bkpt */
- u8 type; /* BWC_??? */
+ u8 error; /* error occured when enabling: don't disable. */
+
+ /* original values */
+ u8 orig_bkpt; /* single byte breakpoint */
+ pte_t orig_pte;
+
+ struct list_head watchpt_read_list; /* read watchpoints on this page */
+ struct list_head watchpt_write_list; /* write */
+ struct list_head watchpt_access_list; /* access */
+ struct list_head watchpt_disabled_list; /* disabled */
+
+ struct bwcpoint *parent; /* watchpoint: bwc_watch (the page) */
+ struct bwcpoint *watchpoint; /* bwc_watch_step: original watchpoint */
} bwcpoint_t, *bwcpoint_p;
-static bwcpoint_t bwcpoint_list;
+static struct list_head bwcpoint_list = LIST_HEAD_INIT(bwcpoint_list);
+
+#define _pdb_bwcpoint_alloc(_var) \
+{ \
+ if ( (_var = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL)) == NULL ) \
+ printk("error: unable to allocate memory %d\n", __LINE__); \
+ else { \
+ memset(_var, 0, sizeof(bwcpoint_t)); \
+ INIT_LIST_HEAD(&_var->watchpt_read_list); \
+ INIT_LIST_HEAD(&_var->watchpt_write_list); \
+ INIT_LIST_HEAD(&_var->watchpt_access_list); \
+ INIT_LIST_HEAD(&_var->watchpt_disabled_list); \
+ } \
+}
+
+/***********************************************************************/
+
+static void _pdb_bwc_print_list (struct list_head *, char *, int);
+
+static void
+_pdb_bwc_print (bwcpoint_p bwc, char *label, int level)
+{
+ printk("%s%03d 0x%08lx:0x%02x %c\n", label, bwc->type,
+ bwc->address, bwc->length, bwc->error ? 'e' : '-');
+
+ if ( !list_empty(&bwc->watchpt_read_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_read_list, "r", level);
+ if ( !list_empty(&bwc->watchpt_write_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_write_list, "w", level);
+ if ( !list_empty(&bwc->watchpt_access_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_access_list, "a", level);
+ if ( !list_empty(&bwc->watchpt_disabled_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_disabled_list, "d", level);
+}
+
+static void
+_pdb_bwc_print_list (struct list_head *bwc_list, char *label, int level)
+{
+ struct list_head *ptr;
+ int counter = 0;
+
+ list_for_each(ptr, bwc_list)
+ {
+ bwcpoint_p bwc = list_entry(ptr, bwcpoint_t, list);
+ printk(" %s[%02d]%s ", level > 0 ? " " : "", counter++,
+ level > 0 ? "" : " ");
+ _pdb_bwc_print(bwc, label, level+1);
+ }
+
+ if (counter == 0)
+ {
+ printk(" empty list\n");
+ }
+}
void
-pdb_initialize_bwcpoint (void)
-{
- memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t));
- INIT_LIST_HEAD(&bwcpoint_list.list);
-
- return;
-}
-
+pdb_bwc_print_list (void)
+{
+ _pdb_bwc_print_list(&bwcpoint_list, " ", 0);
+}
+
+bwcpoint_p
+pdb_search_watchpoint (u32 process, unsigned long address)
+{
+ bwcpoint_p bwc_watch = (bwcpoint_p) 0;
+ bwcpoint_p bwc_entry = (bwcpoint_p) 0;
+ struct list_head *ptr;
+
+ list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+ if (bwc_watch->address == (address & PAGE_MASK)) break;
+ }
+
+ if ( !bwc_watch )
+ {
+ return (bwcpoint_p) 0;
+ }
+
+#define __pdb_search_watchpoint_list(__list) \
+ list_for_each(ptr, (__list)) \
+ { \
+ bwc_entry = list_entry(ptr, bwcpoint_t, list); \
+ if ( bwc_entry->process == process && \
+ bwc_entry->address <= address && \
+ bwc_entry->address + bwc_entry->length > address ) \
+ return bwc_entry; \
+ }
+
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_read_list);
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_write_list);
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_access_list);
+
+#undef __pdb_search_watchpoint_list
+
+ return (bwcpoint_p) 0;
+}
+
+/*************************************************************/
int
pdb_suspend (struct task_struct *target)
@@ -134,6 +244,35 @@
*(unsigned long *) stack = value;
return;
+}
+
+int
+pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op)
+{
+ int rc = 0;
+
+ switch (op->reg)
+ {
+ case 0: op->value = _pdb_get_register(target, LINUX_EAX); break;
+ case 1: op->value = _pdb_get_register(target, LINUX_ECX); break;
+ case 2: op->value = _pdb_get_register(target, LINUX_EDX); break;
+ case 3: op->value = _pdb_get_register(target, LINUX_EBX); break;
+ case 4: op->value = _pdb_get_register(target, LINUX_ESP); break;
+ case 5: op->value = _pdb_get_register(target, LINUX_EBP); break;
+ case 6: op->value = _pdb_get_register(target, LINUX_ESI); break;
+ case 7: op->value = _pdb_get_register(target, LINUX_EDI); break;
+ case 8: op->value = _pdb_get_register(target, LINUX_EIP); break;
+ case 9: op->value = _pdb_get_register(target, LINUX_EFL); break;
+
+ case 10: op->value = _pdb_get_register(target, LINUX_CS); break;
+ case 11: op->value = _pdb_get_register(target, LINUX_SS); break;
+ case 12: op->value = _pdb_get_register(target, LINUX_DS); break;
+ case 13: op->value = _pdb_get_register(target, LINUX_ES); break;
+ case 14: op->value = _pdb_get_register(target, LINUX_FS); break;
+ case 15: op->value = _pdb_get_register(target, LINUX_GS); break;
+ }
+
+ return rc;
}
int
@@ -209,18 +348,14 @@
eflags |= X86_EFLAGS_TF;
_pdb_set_register(target, LINUX_EFL, eflags);
- bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
- if ( bkpt == NULL )
- {
- printk("error: unable to allocation memory\n");
- return -1;
- }
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
bkpt->process = target->pid;
bkpt->address = 0;
bkpt->type = BWC_DEBUG;
- list_add(&bkpt->list, &bwcpoint_list.list);
+ list_add_tail(&bkpt->list, &bwcpoint_list);
wake_up_process(target);
@@ -229,7 +364,7 @@
int
pdb_insert_memory_breakpoint (struct task_struct *target,
- memory_t address, u32 length)
+ unsigned long address, u32 length)
{
int rc = 0;
bwcpoint_p bkpt;
@@ -237,38 +372,34 @@
printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length);
- bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
- if ( bkpt == NULL )
- {
- printk("error: unable to allocation memory\n");
+ if ( length != 1 )
+ {
+ printk("error: breakpoint length should be 1\n");
return -1;
}
- if ( length != 1 )
- {
- printk("error: breakpoint length should be 1\n");
- kfree(bkpt);
- return -1;
- }
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
bkpt->process = target->pid;
bkpt->address = address;
bkpt->type = BWC_INT3;
- pdb_access_memory(target, address, &bkpt->old_value, 1, 0);
- pdb_access_memory(target, address, &breakpoint_opcode, 1, 1);
+ pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_READ);
+ pdb_access_memory(target, address, &breakpoint_opcode, 1, PDB_MEM_WRITE);
- list_add(&bkpt->list, &bwcpoint_list.list);
+ list_add_tail(&bkpt->list, &bwcpoint_list);
printk("breakpoint_set %d:%lx OLD: 0x%x\n",
- target->pid, address, bkpt->old_value);
+ target->pid, address, bkpt->orig_bkpt);
+ pdb_bwc_print_list();
return rc;
}
int
pdb_remove_memory_breakpoint (struct task_struct *target,
- memory_t address, u32 length)
+ unsigned long address, u32 length)
{
int rc = 0;
bwcpoint_p bkpt = NULL;
@@ -276,7 +407,7 @@
printk ("remove breakpoint %d:%lx\n", target->pid, address);
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( target->pid == bkpt->process &&
@@ -285,17 +416,223 @@
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
+ if (entry == &bwcpoint_list)
{
printk ("error: no breakpoint found\n");
return -1;
}
+ pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_WRITE);
+
list_del(&bkpt->list);
-
- pdb_access_memory(target, address, &bkpt->old_value, 1, 1);
-
kfree(bkpt);
+
+ pdb_bwc_print_list();
+
+ return rc;
+}
+
+#define PDB_PTE_UPDATE 1
+#define PDB_PTE_RESTORE 2
+
+int
+pdb_change_pte (struct task_struct *target, bwcpoint_p bwc, int mode)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep;
+
+ pgd = pgd_offset(target->mm, bwc->address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return -1;
+
+ pud = pud_offset(pgd, bwc->address);
+ if (pud_none(*pud) || unlikely(pud_bad(*pud))) return -2;
+
+ pmd = pmd_offset(pud, bwc->address);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return -3;
+
+ ptep = pte_offset_map(pmd, bwc->address);
+ if (!ptep) return -4;
+
+ switch ( mode )
+ {
+ case PDB_PTE_UPDATE: /* added or removed a watchpoint. update pte. */
+ {
+ pte_t new_pte;
+
+ if ( pte_val(bwc->parent->orig_pte) == 0 ) /* new watchpoint page */
+ {
+ bwc->parent->orig_pte = *ptep;
+ }
+
+ new_pte = bwc->parent->orig_pte;
+
+ if ( !list_empty(&bwc->parent->watchpt_read_list) ||
+ !list_empty(&bwc->parent->watchpt_access_list) )
+ {
+ new_pte = pte_rdprotect(new_pte);
+ }
+
+ if ( !list_empty(&bwc->parent->watchpt_write_list) ||
+ !list_empty(&bwc->parent->watchpt_access_list) )
+ {
+ new_pte = pte_wrprotect(new_pte);
+ }
+
+ if ( pte_val(new_pte) != pte_val(*ptep) )
+ {
+ *ptep = new_pte;
+ flush_tlb_mm(target->mm);
+ }
+ break;
+ }
+ case PDB_PTE_RESTORE : /* suspend watchpoint by restoring original pte */
+ {
+ *ptep = bwc->parent->orig_pte;
+ flush_tlb_mm(target->mm);
+ break;
+ }
+ default :
+ {
+ printk("(linux) unknown mode %d %d\n", mode, __LINE__);
+ break;
+ }
+ }
+
+ pte_unmap(ptep); /* can i flush the tlb before pte_unmap? */
+
+ return rc;
+}
+
+int
+pdb_insert_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+ int rc = 0;
+
+ bwcpoint_p bwc_watch;
+ bwcpoint_p bwc_entry;
+ struct list_head *ptr;
+ unsigned long page = watchpt->address & PAGE_MASK;
+ struct list_head *watchpoint_list;
+
+ printk("insert watchpoint: %d %x %x\n",
+ watchpt->type, watchpt->address, watchpt->length);
+
+ list_for_each(ptr, &bwcpoint_list) /* find existing bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+
+ if (bwc_watch->address == page) goto got_bwc_watch;
+ }
+
+ _pdb_bwcpoint_alloc(bwc_watch); /* create new bwc:watch */
+ if ( bwc_watch == NULL ) return -1;
+
+ bwc_watch->type = BWC_WATCH;
+ bwc_watch->process = target->pid;
+ bwc_watch->address = page;
+
+ list_add_tail(&bwc_watch->list, &bwcpoint_list);
+
+ got_bwc_watch:
+
+ switch (watchpt->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &bwc_watch->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &bwc_watch->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &bwc_watch->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", watchpt->type); return -2;
+ }
+
+ _pdb_bwcpoint_alloc(bwc_entry); /* create new bwc:entry */
+ if ( bwc_entry == NULL ) return -1;
+
+ bwc_entry->process = target->pid;
+ bwc_entry->address = watchpt->address;
+ bwc_entry->length = watchpt->length;
+ bwc_entry->type = watchpt->type;
+ bwc_entry->parent = bwc_watch;
+
+ list_add_tail(&bwc_entry->list, watchpoint_list);
+ pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+
+ pdb_bwc_print_list();
+
+ return rc;
+}
+
+int
+pdb_remove_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+ int rc = 0;
+ bwcpoint_p bwc_watch = (bwcpoint_p) NULL;
+ bwcpoint_p bwc_entry = (bwcpoint_p) NULL;
+ unsigned long page = watchpt->address & PAGE_MASK;
+ struct list_head *ptr;
+ struct list_head *watchpoint_list;
+
+ printk("remove watchpoint: %d %x %x\n",
+ watchpt->type, watchpt->address, watchpt->length);
+
+ list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+ if (bwc_watch->address == page) break;
+ }
+
+ if ( !bwc_watch )
+ {
+ printk("(linux) delete watchpoint: can't find bwc page 0x%08x\n",
+ watchpt->address);
+ return -1;
+ }
+
+ switch (watchpt->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &bwc_watch->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &bwc_watch->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &bwc_watch->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", watchpt->type); return -2;
+ }
+
+ list_for_each(ptr, watchpoint_list) /* find watchpoint */
+ {
+ bwc_entry = list_entry(ptr, bwcpoint_t, list);
+ if ( bwc_entry->address == watchpt->address &&
+ bwc_entry->length == watchpt->length ) break;
+ }
+
+ if ( !bwc_entry ) /* or ptr == watchpoint_list */
+ {
+ printk("(linux) delete watchpoint: can't find watchpoint 0x%08x\n",
+ watchpt->address);
+ return -1;
+ }
+
+ list_del(&bwc_entry->list);
+ pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+ kfree(bwc_entry);
+
+
+ if ( list_empty(&bwc_watch->watchpt_read_list) &&
+ list_empty(&bwc_watch->watchpt_write_list) &&
+ list_empty(&bwc_watch->watchpt_access_list) )
+ {
+ list_del(&bwc_watch->list);
+ kfree(bwc_watch);
+ }
+
+ pdb_bwc_print_list();
return rc;
}
@@ -312,16 +649,24 @@
switch (val)
{
case DIE_DEBUG:
- if (pdb_debug_fn(args->regs, args->trapnr, args->err))
+ if ( pdb_debug_fn(args->regs, args->trapnr, args->err) )
return NOTIFY_STOP;
break;
case DIE_TRAP:
- if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err))
+ if ( args->trapnr == 3 && pdb_int3_fn(args->regs, args->err) )
return NOTIFY_STOP;
break;
case DIE_INT3: /* without kprobes, we should never see
DIE_INT3 */
+ if ( pdb_int3_fn(args->regs, args->err) )
+ return NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ if ( pdb_page_fault_fn(args->regs, args->trapnr, args->err) )
+ return NOTIFY_STOP;
+ break;
case DIE_GPF:
- case DIE_PAGE_FAULT:
+ printk("---------------GPF\n");
+ break;
default:
break;
}
@@ -330,70 +675,110 @@
}
-int
+static int
pdb_debug_fn (struct pt_regs *regs, long error_code,
unsigned int condition)
{
pdb_response_t resp;
bwcpoint_p bkpt = NULL;
-
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+
+ printk("pdb_debug_fn\n");
+
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( current->pid == bkpt->process &&
- bkpt->type == BWC_DEBUG )
+ (bkpt->type == BWC_DEBUG || /* single step */
+ bkpt->type == BWC_WATCH_STEP)) /* single step over watchpoint */
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
+ if (entry == &bwcpoint_list)
{
printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip);
return 0;
}
- list_del(&bkpt->list);
-
pdb_suspend(current);
- printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+ printk("(pdb) %s pid: %d, eip: 0x%08lx\n",
+ bkpt->type == BWC_DEBUG ? "debug" : "watch-step",
+ current->pid, regs->eip);
regs->eflags &= ~X86_EFLAGS_TF;
set_tsk_thread_flag(current, TIF_SINGLESTEP);
- resp.operation = PDB_OPCODE_STEP;
+ switch (bkpt->type)
+ {
+ case BWC_DEBUG:
+ resp.operation = PDB_OPCODE_STEP;
+ break;
+ case BWC_WATCH_STEP:
+ {
+ struct list_head *watchpoint_list;
+ bwcpoint_p watch_page = bkpt->watchpoint->parent;
+
+ switch (bkpt->watchpoint->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &watch_page->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &watch_page->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &watch_page->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", bkpt->watchpoint->type); return 0;
+ }
+
+ resp.operation = PDB_OPCODE_WATCHPOINT;
+ list_del_init(&bkpt->watchpoint->list);
+ list_add_tail(&bkpt->watchpoint->list, watchpoint_list);
+ pdb_change_pte(current, bkpt->watchpoint, PDB_PTE_UPDATE);
+ pdb_bwc_print_list();
+ break;
+ }
+ default:
+ printk("unknown breakpoint type %d %d\n", __LINE__, bkpt->type);
+ return 0;
+ }
+
resp.process = current->pid;
resp.status = PDB_RESPONSE_OKAY;
pdb_send_response(&resp);
+ list_del(&bkpt->list);
+ kfree(bkpt);
+
return 1;
}
-int
+static int
pdb_int3_fn (struct pt_regs *regs, long error_code)
{
pdb_response_t resp;
bwcpoint_p bkpt = NULL;
+ unsigned long address = regs->eip - 1;
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( current->pid == bkpt->process &&
- regs->eip == bkpt->address &&
+ address == bkpt->address &&
bkpt->type == BWC_INT3 )
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
- {
- printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip);
+ if (entry == &bwcpoint_list)
+ {
+ printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, address);
return 0;
}
- printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+ printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, address);
pdb_suspend(current);
@@ -405,6 +790,54 @@
return 1;
}
+
+static int
+pdb_page_fault_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition)
+{
+ unsigned long cr2;
+ unsigned long cr3;
+ bwcpoint_p bwc;
+ bwcpoint_p watchpt;
+ bwcpoint_p bkpt;
+
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
+
+ bwc = pdb_search_watchpoint(current->pid, cr2);
+ if ( !bwc )
+ {
+ return 0; /* not mine */
+ }
+
+ printk("page_fault cr2:%08lx err:%lx eip:%08lx\n",
+ cr2, error_code, regs->eip);
+
+ /* disable the watchpoint */
+ watchpt = bwc->watchpoint;
+ list_del_init(&bwc->list);
+ list_add_tail(&bwc->list, &bwc->parent->watchpt_disabled_list);
+ pdb_change_pte(current, bwc, PDB_PTE_RESTORE);
+
+ /* single step the faulting instruction */
+ regs->eflags |= X86_EFLAGS_TF;
+
+ /* create a bwcpoint entry so we know what to do once we regain control */
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
+
+ bkpt->process = current->pid;
+ bkpt->address = 0;
+ bkpt->type = BWC_WATCH_STEP;
+ bkpt->watchpoint = bwc;
+
+ /* add to head so we see it first the next time we break */
+ list_add(&bkpt->list, &bwcpoint_list);
+
+ pdb_bwc_print_list();
+ return 1;
+}
+
/*
* Local variables:
diff -r 5f1ed597f107 -r 8799d14bef77
tools/debugger/pdb/linux-2.6-module/module.c
--- a/tools/debugger/pdb/linux-2.6-module/module.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/module.c Thu Aug 25 22:53:20 2005
@@ -98,6 +98,11 @@
printk("(linux) detach 0x%x\n", request->process);
resp.status = PDB_RESPONSE_OKAY;
break;
+ case PDB_OPCODE_RD_REG :
+ resp.u.rd_reg.reg = request->u.rd_reg.reg;
+ pdb_read_register(target, &resp.u.rd_reg);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
case PDB_OPCODE_RD_REGS :
pdb_read_registers(target, &resp.u.rd_regs);
resp.status = PDB_RESPONSE_OKAY;
@@ -108,14 +113,16 @@
break;
case PDB_OPCODE_RD_MEM :
pdb_access_memory(target, request->u.rd_mem.address,
- &resp.u.rd_mem.data, request->u.rd_mem.length, 0);
+ &resp.u.rd_mem.data, request->u.rd_mem.length,
+ PDB_MEM_READ);
resp.u.rd_mem.address = request->u.rd_mem.address;
resp.u.rd_mem.length = request->u.rd_mem.length;
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_WR_MEM :
pdb_access_memory(target, request->u.wr_mem.address,
- &request->u.wr_mem.data, request->u.wr_mem.length, 1);
+ &request->u.wr_mem.data, request->u.wr_mem.length,
+ PDB_MEM_WRITE);
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_CONTINUE :
@@ -137,6 +144,14 @@
request->u.bkpt.length);
resp.status = PDB_RESPONSE_OKAY;
break;
+ case PDB_OPCODE_SET_WATCHPT :
+ pdb_insert_watchpoint(target, &request->u.watchpt);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_CLR_WATCHPT :
+ pdb_remove_watchpoint(target, &request->u.watchpt);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
default:
printk("(pdb) unknown request operation %d\n", request->operation);
resp.status = PDB_RESPONSE_ERROR;
@@ -184,7 +199,7 @@
}
static void
-pdb_send_connection_status(int status, memory_t ring)
+pdb_send_connection_status(int status, unsigned long ring)
{
ctrl_msg_t cmsg =
{
@@ -248,8 +263,6 @@
pdb_sring_t *sring;
printk("----\npdb initialize %s %s\n", __DATE__, __TIME__);
-
- pdb_initialize_bwcpoint();
/*
if ( xen_start_info.flags & SIF_INITDOMAIN )
diff -r 5f1ed597f107 -r 8799d14bef77
tools/debugger/pdb/linux-2.6-module/pdb_debug.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
void pdb_initialize_bwcpoint (void);
int pdb_suspend (struct task_struct *target);
int pdb_resume (struct task_struct *target);
+int pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op);
int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op);
int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op);
int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req,
@@ -17,16 +18,16 @@
int pdb_step (struct task_struct *target);
int pdb_insert_memory_breakpoint (struct task_struct *target,
- memory_t address, u32 length);
+ unsigned long address, u32 length);
int pdb_remove_memory_breakpoint (struct task_struct *target,
- memory_t address, u32 length);
+ unsigned long address, u32 length);
+int pdb_insert_watchpoint (struct task_struct *target,
+ pdb_op_watchpt_p watchpt);
+int pdb_remove_watchpoint (struct task_struct *target,
+ pdb_op_watchpt_p watchpt);
int pdb_exceptions_notify (struct notifier_block *self, unsigned long val,
void *data);
-
-int pdb_debug_fn (struct pt_regs *regs, long error_code,
- unsigned int condition);
-int pdb_int3_fn (struct pt_regs *regs, long error_code);
/* module.c */
void pdb_send_response (pdb_response_t *response);
diff -r 5f1ed597f107 -r 8799d14bef77
tools/debugger/pdb/linux-2.6-module/pdb_module.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Thu Aug 25 22:53:20 2005
@@ -14,20 +14,27 @@
#define PDB_OPCODE_DETACH 3
-#define PDB_OPCODE_RD_REGS 4
+#define PDB_OPCODE_RD_REG 4
+typedef struct pdb_op_rd_reg
+{
+ u32 reg;
+ u32 value;
+} pdb_op_rd_reg_t, *pdb_op_rd_reg_p;
+
+#define PDB_OPCODE_RD_REGS 5
typedef struct pdb_op_rd_regs
{
u32 reg[GDB_REGISTER_FRAME_SIZE];
} pdb_op_rd_regs_t, *pdb_op_rd_regs_p;
-#define PDB_OPCODE_WR_REG 5
+#define PDB_OPCODE_WR_REG 6
typedef struct pdb_op_wr_reg
{
u32 reg;
u32 value;
} pdb_op_wr_reg_t, *pdb_op_wr_reg_p;
-#define PDB_OPCODE_RD_MEM 6
+#define PDB_OPCODE_RD_MEM 7
typedef struct pdb_op_rd_mem_req
{
u32 address;
@@ -41,7 +48,7 @@
u8 data[1024];
} pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p;
-#define PDB_OPCODE_WR_MEM 7
+#define PDB_OPCODE_WR_MEM 8
typedef struct pdb_op_wr_mem
{
u32 address;
@@ -49,16 +56,33 @@
u8 data[1024]; /* arbitrary */
} pdb_op_wr_mem_t, *pdb_op_wr_mem_p;
-#define PDB_OPCODE_CONTINUE 8
-#define PDB_OPCODE_STEP 9
+#define PDB_OPCODE_CONTINUE 9
+#define PDB_OPCODE_STEP 10
-#define PDB_OPCODE_SET_BKPT 10
-#define PDB_OPCODE_CLR_BKPT 11
+#define PDB_OPCODE_SET_BKPT 11
+#define PDB_OPCODE_CLR_BKPT 12
typedef struct pdb_op_bkpt
{
u32 address;
u32 length;
} pdb_op_bkpt_t, *pdb_op_bkpt_p;
+
+#define PDB_OPCODE_SET_WATCHPT 13
+#define PDB_OPCODE_CLR_WATCHPT 14
+#define PDB_OPCODE_WATCHPOINT 15
+typedef struct pdb_op_watchpt
+{
+#define BWC_DEBUG 1
+#define BWC_INT3 3
+#define BWC_WATCH 100 /* pdb: watchpoint page */
+#define BWC_WATCH_STEP 101 /* pdb: watchpoint single step */
+#define BWC_WATCH_WRITE 102
+#define BWC_WATCH_READ 103
+#define BWC_WATCH_ACCESS 104
+ u32 type;
+ u32 address;
+ u32 length;
+} pdb_op_watchpt_t, *pdb_op_watchpt_p;
typedef struct
@@ -68,10 +92,12 @@
union
{
pdb_op_attach_t attach;
+ pdb_op_rd_reg_t rd_reg;
pdb_op_wr_reg_t wr_reg;
pdb_op_rd_mem_req_t rd_mem;
pdb_op_wr_mem_t wr_mem;
pdb_op_bkpt_t bkpt;
+ pdb_op_watchpt_t watchpt;
} u;
} pdb_request_t, *pdb_request_p;
@@ -87,6 +113,7 @@
s16 status; /* PDB_RESPONSE_??? */
union
{
+ pdb_op_rd_reg_t rd_reg;
pdb_op_rd_regs_t rd_regs;
pdb_op_rd_mem_resp_t rd_mem;
} u;
@@ -94,6 +121,11 @@
DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t);
+
+
+/* from access_process_vm */
+#define PDB_MEM_READ 0
+#define PDB_MEM_WRITE 1
#endif
diff -r 5f1ed597f107 -r 8799d14bef77
tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch
--- a/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Wed Aug 24
02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Thu Aug 25
22:53:20 2005
@@ -1,7 +1,15 @@
diff -u linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c
linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c
--- linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c 2005-07-31
22:36:50.000000000 +0100
+++ linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 2005-08-01
10:57:31.000000000 +0100
-@@ -172,6 +172,7 @@
+@@ -151,6 +151,7 @@
+ /* TLB flushing */
+ EXPORT_SYMBOL(flush_tlb_page);
+ #endif
++EXPORT_SYMBOL(flush_tlb_mm);
+
+ #ifdef CONFIG_X86_IO_APIC
+ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+@@ -172,6 +173,7 @@
EXPORT_SYMBOL_GPL(unset_nmi_callback);
EXPORT_SYMBOL(register_die_notifier);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_domain.c
--- a/tools/debugger/pdb/pdb_caml_domain.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_domain.c Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
* PDB's OCaml interface library for debugging domains
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <xendebug.h>
#include <errno.h>
#include <stdio.h>
@@ -41,6 +41,54 @@
/****************************************************************************/
+
+/*
+ * dom_read_register : context_t -> int -> int32
+ */
+value
+dom_read_register (value context, value reg)
+{
+ CAMLparam2(context, reg);
+ CAMLlocal1(result);
+
+ int my_reg = Int_val(reg);
+ cpu_user_regs_t *regs;
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, ®s) )
+ {
+ printf("(pdb) read registers error!\n"); fflush(stdout);
+ failwith("read registers error");
+ }
+
+ dump_regs(regs);
+
+ result = caml_alloc_tuple(16);
+
+ switch (my_reg)
+ {
+ case GDB_EAX: result = caml_copy_int32(regs->eax); break;
+ case GDB_ECX: result = caml_copy_int32(regs->ecx); break;
+ case GDB_EDX: result = caml_copy_int32(regs->edx); break;
+ case GDB_EBX: result = caml_copy_int32(regs->ebx); break;
+ case GDB_ESP: result = caml_copy_int32(regs->esp); break;
+ case GDB_EBP: result = caml_copy_int32(regs->ebp); break;
+ case GDB_ESI: result = caml_copy_int32(regs->esi); break;
+ case GDB_EDI: result = caml_copy_int32(regs->edi); break;
+ case GDB_EIP: result = caml_copy_int32(regs->eip); break;
+ case GDB_EFL: result = caml_copy_int32(regs->eflags); break;
+ case GDB_CS: result = caml_copy_int32(regs->cs); break;
+ case GDB_SS: result = caml_copy_int32(regs->ss); break;
+ case GDB_DS: result = caml_copy_int32(regs->ds); break;
+ case GDB_ES: result = caml_copy_int32(regs->es); break;
+ case GDB_FS: result = caml_copy_int32(regs->fs); break;
+ case GDB_GS: result = caml_copy_int32(regs->gs); break;
+ }
+
+ CAMLreturn(result);
+}
/*
* dom_read_registers : context_t -> int32
@@ -155,7 +203,7 @@
context_t ctx;
int loop;
char *buffer;
- memory_t my_address = Int32_val(address);
+ unsigned long my_address = Int32_val(address);
u32 my_length = Int_val(length);
printf ("(pdb) read memory\n");
@@ -211,7 +259,7 @@
context_t ctx;
char buffer[4096]; /* a big buffer */
- memory_t my_address;
+ unsigned long my_address;
u32 length = 0;
printf ("(pdb) write memory\n");
@@ -231,7 +279,7 @@
}
buffer[length++] = Int_val(Field(node, 0));
- my_address = (memory_t) Int32_val(address);
+ my_address = (unsigned long) Int32_val(address);
if ( xendebug_write_memory(xc_handle, ctx.domain, ctx.vcpu,
my_address, length, buffer) )
@@ -296,7 +344,7 @@
CAMLparam3(context, address, length);
context_t ctx;
- memory_t my_address = (memory_t) Int32_val(address);
+ unsigned long my_address = (unsigned long) Int32_val(address);
int my_length = Int_val(length);
decode_context(&ctx, context);
@@ -325,7 +373,7 @@
context_t ctx;
- memory_t my_address = (memory_t) Int32_val(address);
+ unsigned long my_address = (unsigned long) Int32_val(address);
int my_length = Int_val(length);
printf ("(pdb) remove memory breakpoint 0x%lx %d\n",
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_evtchn.c
--- a/tools/debugger/pdb/pdb_caml_evtchn.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_evtchn.c Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
* PDB's OCaml interface library for event channels
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_process.c
--- a/tools/debugger/pdb/pdb_caml_process.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_process.c Thu Aug 25 22:53:20 2005
@@ -15,7 +15,7 @@
#include <caml/memory.h>
#include <caml/mlvalues.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/io/domain_controller.h>
#include <xen/linux/privcmd.h>
@@ -113,6 +113,12 @@
case PDB_OPCODE_DETACH :
break;
+ case PDB_OPCODE_RD_REG :
+ {
+ sprintf(&msg[0], "%08x", _flip(resp->u.rd_reg.value));
+ break;
+ }
+
case PDB_OPCODE_RD_REGS :
{
int loop;
@@ -161,16 +167,22 @@
}
case PDB_OPCODE_SET_BKPT :
- {
- break;
- }
case PDB_OPCODE_CLR_BKPT :
- {
+ case PDB_OPCODE_SET_WATCHPT :
+ case PDB_OPCODE_CLR_WATCHPT :
+ {
+ break;
+ }
+
+ case PDB_OPCODE_WATCHPOINT :
+ {
+ sprintf(msg, "S05");
break;
}
default :
- printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n");
+ printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE %d\n",
+ resp->operation);
break;
}
@@ -258,6 +270,32 @@
CAMLreturn(Val_unit);
}
+
+
+/*
+ * proc_read_register : context_t -> int -> unit
+ */
+value
+proc_read_register (value context, value reg)
+{
+ CAMLparam1(context);
+
+ pdb_request_t req;
+ context_t ctx;
+ int my_reg = Int_val(reg);
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_RD_REG;
+ req.process = ctx.process;
+ req.u.rd_reg.reg = my_reg;
+ req.u.rd_reg.value = 0;
+
+ send_request (ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
/*
@@ -443,7 +481,7 @@
/*
- * proc_insert_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_insert_memory_breakpoint : context_t -> int32 -> int -> unit
*/
value
proc_insert_memory_breakpoint (value context, value address, value length)
@@ -457,7 +495,7 @@
req.operation = PDB_OPCODE_SET_BKPT;
req.process = ctx.process;
- req.u.bkpt.address = (memory_t) Int32_val(address);
+ req.u.bkpt.address = (unsigned long) Int32_val(address);
req.u.bkpt.length = Int_val(length);
send_request(ctx.ring, ctx.evtchn, &req);
@@ -466,7 +504,7 @@
}
/*
- * proc_remove_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_remove_memory_breakpoint : context_t -> int32 -> int -> unit
*/
value
proc_remove_memory_breakpoint (value context, value address, value length)
@@ -480,8 +518,56 @@
req.operation = PDB_OPCODE_CLR_BKPT;
req.process = ctx.process;
- req.u.bkpt.address = (memory_t) Int32_val(address);
+ req.u.bkpt.address = (unsigned long) Int32_val(address);
req.u.bkpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_insert_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_insert_watchpoint (value context, value kind, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_SET_WATCHPT;
+ req.process = ctx.process;
+ req.u.watchpt.type = Int_val(kind);
+ req.u.watchpt.address = (unsigned long) Int32_val(address);
+ req.u.watchpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_remove_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_remove_watchpoint (value context, value kind, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_CLR_WATCHPT;
+ req.process = ctx.process;
+ req.u.watchpt.type = Int_val(kind);
+ req.u.watchpt.address = (unsigned long) Int32_val(address);
+ req.u.watchpt.length = Int_val(length);
send_request(ctx.ring, ctx.evtchn, &req);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_xc.c
--- a/tools/debugger/pdb/pdb_caml_xc.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_xc.c Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
* PDB's OCaml interface library for debugging domains
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <xendebug.h>
#include <errno.h>
#include <stdio.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_xcs.c
--- a/tools/debugger/pdb/pdb_caml_xcs.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_xcs.c Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/io/domain_controller.h>
@@ -50,7 +50,7 @@
{
CAMLparam2(domain, ring);
int my_domain = Int_val(domain);
- memory_t my_ring = Int32_val(ring);
+ unsigned long my_ring = Int32_val(ring);
pdb_front_ring_t *front_ring;
pdb_sring_t *sring;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_xen.c
--- a/tools/debugger/pdb/pdb_xen.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_xen.c Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
* PDB interface library for accessing Xen
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/readme
--- a/tools/debugger/pdb/readme Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/readme Thu Aug 25 22:53:20 2005
@@ -1,9 +1,9 @@
-PDB 0.3
+PDB 0.3.3
http://www.cl.cam.ac.uk/netos/pdb
Alex Ho
-June 2005
+August 2005
This is the latest incarnation of the pervasive debugger.
@@ -79,6 +79,11 @@
Process
PDB can also debug a process running in a Linux 2.6 domain.
+ You will need to patch the Linux 2.6 domain U tree to export some
+ additional symbols for the pdb module
+
+ % make -C linux-2.6-patches
+
After running PDB in domain 0, insert the pdb module in dom u:
% insmod linux-2.6-module/pdb.ko
@@ -87,7 +92,14 @@
(gdb) maint packet x context = process <domid> <pid>
+ Read, write, and access watchpoint should also work for processes,
+ use the "rwatch", "watch" and "awatch" gdb commands respectively.
+
+ If you are having trouble with GDB 5.3 (i386-redhat-linux-gnu),
+ try GDB 6.3 (configured with --target=i386-linux-gnu).
+
+
To Do
-- watchpoints
+- watchpoints for domains
- support for SMP
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/Makefile
--- a/tools/examples/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/examples/Makefile Thu Aug 25 22:53:20 2005
@@ -16,7 +16,7 @@
# Xen script dir and scripts to go there.
XEN_SCRIPT_DIR = /etc/xen/scripts
-XEN_SCRIPTS = network vif-bridge
+XEN_SCRIPTS = network-bridge vif-bridge
XEN_SCRIPTS += network-route vif-route
XEN_SCRIPTS += block-file
XEN_SCRIPTS += block-enbd
@@ -24,10 +24,14 @@
XEN_BOOT_DIR = /usr/lib/xen/boot
XEN_BOOT = mem-map.sxp
+XEN_HOTPLUG_DIR = /etc/hotplug.d/xen-backend
+XEN_HOTPLUG_SCRIPTS = backend.hotplug
+
all:
build:
-install: all install-initd install-configs install-scripts install-boot
+install: all install-initd install-configs install-scripts install-boot \
+ install-hotplug
install-initd:
[ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d
@@ -60,4 +64,12 @@
$(INSTALL_PROG) $$i $(DESTDIR)$(XEN_BOOT_DIR); \
done
+install-hotplug:
+ [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
+ for i in $(XEN_HOTPLUG_SCRIPTS); \
+ do [ -a $(DESTDIR)$(XEN_HOTPLUG_DIR)/$$i ] || \
+ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
+ done
+
clean:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/README
--- a/tools/examples/README Wed Aug 24 02:43:18 2005
+++ b/tools/examples/README Thu Aug 25 22:53:20 2005
@@ -9,9 +9,20 @@
send it (preferably with a little summary to go in this file) to
<xen-devel@xxxxxxxxxxxxxxxxxxxxx> so we can add it to this directory.
+block-enbd - binds/unbinds network block devices
+block-file - binds/unbinds file to loopback device
+mem-map.sxp - memory map xend configuration file.
network - default network setup script called by xend at startup.
+network-route - default xen network start/stop script.
+network-nat - default xen network start/stop script when using NAT.
vif-bridge - default virtual network interface setup script.
+vif-route - default xen virtual network start/stop script
+vif-nat - configures vif in routed-nat mode.
xend-config.sxp - default xend configuration file.
xmexample1 - example configuration script for 'xm create'.
xmexample2 - a more complex configuration script for 'xm create'.
+xmexample3 - an advanced configuration script for 'xm create'
+ that utilizes the vmid.
+xmexample.vmx - a configuration script for creating a vmx domain with
+ 'xm create'.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Wed Aug 24 02:43:18 2005
+++ b/tools/examples/vif-bridge Thu Aug 25 22:53:20 2005
@@ -74,8 +74,10 @@
exit
fi
-# Add/remove vif to/from bridge.
-brctl ${brcmd} ${bridge} ${vif}
+# Add vif to bridge. vifs are auto-removed from bridge.
+if [ "${brcmd}" == "addif" ] ; then
+ brctl ${brcmd} ${bridge} ${vif}
+fi
ifconfig ${vif} $OP
if [ ${ip} ] ; then
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp Wed Aug 24 02:43:18 2005
+++ b/tools/examples/xend-config.sxp Thu Aug 25 22:53:20 2005
@@ -28,7 +28,7 @@
## Use the following if VIF traffic is bridged.
# The script used to start/stop networking for xend.
-(network-script network)
+(network-script network-bridge)
# The default bridge that virtual interfaces should be connected to.
(vif-bridge xen-br0)
# The default script used to control virtual interfaces.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx Wed Aug 24 02:43:18 2005
+++ b/tools/examples/xmexample.vmx Thu Aug 25 22:53:20 2005
@@ -10,13 +10,8 @@
# Kernel image file.
kernel = "/usr/lib/xen/boot/vmxloader"
-# Optional ramdisk.
-#ramdisk = "/boot/initrd.gz"
-
-# The domain build function. Default is 'linux'.
+# The domain build function. VMX domain uses 'vmx'.
builder='vmx'
-#builder='linux'
-#builder='netbsd'
# Initial memory allocation (in megabytes) for the new domain.
memory = 128
@@ -26,13 +21,6 @@
# Which CPU to start domain on?
#cpu = -1 # leave to Xen to pick
-
-#----------------------------------------------------------------------------
-# Define network interfaces.
-
-# Number of network interfaces. Default is 1.
-#nics=1
-nics=0
# Optionally define mac and/or bridge for the network interfaces.
# Random MACs are assigned if not given.
@@ -46,37 +34,7 @@
# and MODE is r for read-only, w for read-write.
#disk = [ 'phy:hda1,hda1,r' ]
-disk = [ 'file:/var/images/min-el3-i386.img,hda,w' ]
-
-#----------------------------------------------------------------------------
-# Set the kernel command line for the new domain.
-# You only need to define the IP parameters and hostname if the domain's
-# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
-# You can use 'extra' to set the runlevel and custom environment
-# variables used by custom rc scripts (e.g. VMID=, usr= ).
-
-# Set if you want dhcp to allocate the IP address.
-#dhcp="dhcp"
-# Set netmask.
-#netmask=
-# Set default gateway.
-#gateway=
-# Set the hostname.
-#hostname= "vm%d" % vmid
-
-# Set root device.
-#root = "/dev/ram0"
-root = "/dev/hda1 ro"
-
-# Root device for nfs.
-#root = "/dev/nfs"
-# The nfs server.
-#nfs_server = '169.254.1.0'
-# Root directory on the nfs server.
-#nfs_root = '/full/path/to/root/directory'
-
-# Sets runlevel 4.
-#extra = "acpi=off console=ttyS0 console=tty0 1"
+disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ]
#----------------------------------------------------------------------------
# Set according to whether you want the domain restarted when it exits.
@@ -125,15 +83,10 @@
#-----------------------------------------------------------------------------
-# set the real time clock to local time [default=utc]
-#localtime='utc'
+# set the real time clock to local time [default=0 i.e. set to utc]
+#localtime=1
#-----------------------------------------------------------------------------
# start in full screen
#full-screen=1
-
-#-----------------------------------------------------------------------------
-# set the mac address of the first interface
-#macaddr=
-
diff -r 5f1ed597f107 -r 8799d14bef77 tools/firmware/acpi/acpi2_0.h
--- a/tools/firmware/acpi/acpi2_0.h Wed Aug 24 02:43:18 2005
+++ b/tools/firmware/acpi/acpi2_0.h Thu Aug 25 22:53:20 2005
@@ -18,7 +18,7 @@
#ifndef _ACPI_2_0_H_
#define _ACPI_2_0_H_
-#include "xc.h" // for u8, u16, u32, u64 definition
+#include "xenctrl.h" // for u8, u16, u32, u64 definition
#pragma pack (1)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/i8254.c Thu Aug 25 22:53:20 2005
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "vl.h"
-#include "xc.h"
+#include "xenctrl.h"
#include <io/ioreq.h>
//#define DEBUG_PIT
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/i8259.c Thu Aug 25 22:53:20 2005
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "vl.h"
-#include "xc.h"
+#include "xenctrl.h"
#include <io/ioreq.h>
/* debug PIC */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/ide.c Thu Aug 25 22:53:20 2005
@@ -430,6 +430,7 @@
put_le16(p + 59, 0x100 | s->mult_sectors);
put_le16(p + 60, s->nb_sectors);
put_le16(p + 61, s->nb_sectors >> 16);
+ put_le16(p + 63, 0x07);
put_le16(p + 80, (1 << 1) | (1 << 2));
put_le16(p + 82, (1 << 14));
put_le16(p + 83, (1 << 14));
@@ -460,7 +461,7 @@
put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */
put_le16(p + 49, 1 << 9); /* LBA supported, no DMA */
put_le16(p + 53, 3); /* words 64-70, 54-58 valid */
- put_le16(p + 63, 0x103); /* DMA modes XXX: may be incorrect */
+ put_le16(p + 63, 0x07); /* Multi-word DMA mode 2 */
put_le16(p + 64, 1); /* PIO modes */
put_le16(p + 65, 0xb4); /* minimum DMA multiword tx cycle time */
put_le16(p + 66, 0xb4); /* recommended DMA multiword tx cycle time */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/ioapic.h
--- a/tools/ioemu/hw/ioapic.h Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/ioapic.h Thu Aug 25 22:53:20 2005
@@ -26,7 +26,7 @@
#ifndef __IOAPIC_H
#define __IOAPIC_H
-#include "xc.h"
+#include "xenctrl.h"
#include <io/ioreq.h>
#include <io/vmx_vlapic.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/monitor.c Thu Aug 25 22:53:20 2005
@@ -225,14 +225,10 @@
}
}
+extern void destroy_vmx_domain(void);
static void do_quit(void)
{
- extern int domid;
- extern FILE* logfile;
- char destroy_cmd[20];
- sprintf(destroy_cmd, "xm destroy %d", domid);
- if (system(destroy_cmd) == -1)
- fprintf(logfile, "%s failed.!\n", destroy_cmd);
+ destroy_vmx_domain();
exit(0);
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/target-i386-dm/Makefile Thu Aug 25 22:53:20 2005
@@ -188,7 +188,7 @@
#########################################################
DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
-DAPIC_SUPPORT
-LIBS+=-lm -L../../libxc -lxc
+LIBS+=-lm -L../../libxc -lxenctrl
ifndef CONFIG_USER_ONLY
LIBS+=-lz
endif
@@ -376,10 +376,10 @@
$(CC) $(DEFINES) -c -o $@ $<
clean:
- rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
qemu-vgaram-bin
+ rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
distclean:
- rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
qemu-vgaram-bin
+ rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
install: all
if [ ! -d $(INSTALL_DIR) ];then mkdir -p $(INSTALL_DIR);fi
@@ -387,8 +387,6 @@
install -m 755 -s $(PROGS) "$(INSTALL_DIR)"
install -m 755 qemu-dm.debug "$(INSTALL_DIR)"
install -m 755 qemu-ifup "$(DESTDIR)$(configdir)"
- gunzip -c qemu-vgaram-bin.gz >qemu-vgaram-bin
- install -m 755 qemu-vgaram-bin "$(DESTDIR)$(configdir)"
ifneq ($(wildcard .depend),)
include .depend
endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/target-i386-dm/helper2.c Thu Aug 25 22:53:20 2005
@@ -47,7 +47,7 @@
#include <fcntl.h>
#include <sys/ioctl.h>
-#include "xc.h"
+#include "xenctrl.h"
#include <io/ioreq.h>
#include "cpu.h"
@@ -55,6 +55,7 @@
#include "vl.h"
shared_iopage_t *shared_page = NULL;
+extern int reset_requested;
CPUX86State *cpu_86_init(void)
{
@@ -327,7 +328,16 @@
env->send_event = 1;
}
-//static unsigned long tsc_per_tick = 1; /* XXX: calibrate */
+void
+destroy_vmx_domain(void)
+{
+ extern int domid;
+ extern FILE* logfile;
+ char destroy_cmd[20];
+ sprintf(destroy_cmd, "xm destroy %d", domid);
+ if (system(destroy_cmd) == -1)
+ fprintf(logfile, "%s failed.!\n", destroy_cmd);
+}
int main_loop(void)
{
@@ -348,6 +358,10 @@
if (vm_running) {
if (shutdown_requested) {
break;
+ }
+ if (reset_requested){
+ qemu_system_reset();
+ reset_requested = 0;
}
}
@@ -391,7 +405,21 @@
}
}
}
+ destroy_vmx_domain();
return 0;
+}
+
+static void
+qemu_vmx_reset(void *unused)
+{
+ char cmd[255];
+ extern int domid;
+
+ /* pause domain first, to avoid repeated reboot request*/
+ xc_domain_pause (xc_handle, domid);
+
+ sprintf(cmd,"xm shutdown -R %d", domid);
+ system (cmd);
}
CPUState *
@@ -400,7 +428,7 @@
CPUX86State *env;
cpu_exec_init();
-
+ qemu_register_reset(qemu_vmx_reset, NULL);
env = malloc(sizeof(CPUX86State));
if (!env)
return NULL;
@@ -427,3 +455,4 @@
return env;
}
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/vl.c Thu Aug 25 22:53:20 2005
@@ -72,7 +72,7 @@
#endif
#endif /* CONFIG_SDL */
-#include "xc.h"
+#include "xenctrl.h"
#include "exec-all.h"
//#define DO_TB_FLUSH
@@ -2030,7 +2030,7 @@
} QEMUResetEntry;
static QEMUResetEntry *first_reset_entry;
-static int reset_requested;
+int reset_requested;
int shutdown_requested;
void qemu_register_reset(QEMUResetHandler *func, void *opaque)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/vl.h Thu Aug 25 22:53:20 2005
@@ -107,6 +107,7 @@
void qemu_register_reset(QEMUResetHandler *func, void *opaque);
void qemu_system_reset_request(void);
+void qemu_system_reset(void);
void qemu_system_shutdown_request(void);
void main_loop_wait(int timeout);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/Makefile
--- a/tools/libxc/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/Makefile Thu Aug 25 22:53:20 2005
@@ -12,28 +12,32 @@
XEN_ROOT = ../..
include $(XEN_ROOT)/tools/Rules.mk
-SRCS :=
-SRCS += xc_sedf.c
-SRCS += xc_bvtsched.c
-SRCS += xc_core.c
-SRCS += xc_domain.c
-SRCS += xc_evtchn.c
-SRCS += xc_gnttab.c
-SRCS += xc_load_bin.c
-SRCS += xc_load_elf.c
-SRCS += xc_linux_build.c
-SRCS += xc_misc.c
-SRCS += xc_physdev.c
-SRCS += xc_private.c
+SRCS :=
+BUILD_SRCS :=
+SRCS += xc_bvtsched.c
+SRCS += xc_core.c
+SRCS += xc_domain.c
+SRCS += xc_evtchn.c
+SRCS += xc_gnttab.c
+SRCS += xc_misc.c
+SRCS += xc_physdev.c
+SRCS += xc_private.c
+SRCS += xc_sedf.c
+
ifeq ($(XEN_TARGET_ARCH),ia64)
-SRCS += xc_ia64_stubs.c
+BUILD_SRCS += xc_ia64_stubs.c
else
-SRCS += xc_load_aout9.c
-SRCS += xc_linux_restore.c
-SRCS += xc_linux_save.c
-SRCS += xc_vmx_build.c
-SRCS += xc_ptrace.c
-SRCS += xc_ptrace_core.c
+SRCS += xc_ptrace.c
+SRCS += xc_ptrace_core.c
+
+BUILD_SRCS := xc_load_aout9.c
+BUILD_SRCS += xc_load_bin.c
+BUILD_SRCS += xc_load_elf.c
+BUILD_SRCS += xc_linux_build.c
+BUILD_SRCS += xc_linux_restore.c
+BUILD_SRCS += xc_linux_save.c
+BUILD_SRCS += xc_vmx_build.c
+BUILD_SRCS += xg_private.c
endif
CFLAGS += -Wall
@@ -43,13 +47,20 @@
CFLAGS += $(INCLUDES) -I.
# Get gcc to generate the dependencies for us.
CFLAGS += -Wp,-MD,.$(@F).d
+LDFLAGS += -L.
DEPS = .*.d
LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
-LIB := libxc.a libxc-pic.a
-LIB += libxc.so libxc.so.$(MAJOR) libxc.so.$(MAJOR).$(MINOR)
+LIB_BUILD_OBJS := $(patsubst %.c,%.o,$(BUILD_SRCS))
+PIC_BUILD_OBJS := $(patsubst %.c,%.opic,$(BUILD_SRCS))
+
+LIB := libxenctrl.a
+LIB += libxenctrl.so libxenctrl.so.$(MAJOR) libxenctrl.so.$(MAJOR).$(MINOR)
+
+LIB += libxenguest.a
+LIB += libxenguest.so libxenguest.so.$(MAJOR) libxenguest.so.$(MAJOR).$(MINOR)
all: build
build: check-for-zlib mk-symlinks
@@ -77,11 +88,16 @@
install: build
[ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR)
$(DESTDIR)/usr/$(LIBDIR)
[ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include
- $(INSTALL_PROG) libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DATA) libxc.a $(DESTDIR)/usr/$(LIBDIR)
- ln -sf libxc.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxc.so.$(MAJOR)
- ln -sf libxc.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxc.so
- $(INSTALL_DATA) xc.h $(DESTDIR)/usr/include
+ $(INSTALL_PROG) libxenctrl.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenctrl.a $(DESTDIR)/usr/$(LIBDIR)
+ ln -sf libxenctrl.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxenctrl.so.$(MAJOR)
+ ln -sf libxenctrl.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenctrl.so
+ $(INSTALL_DATA) xenctrl.h $(DESTDIR)/usr/include
+
+ $(INSTALL_PROG) libxenguest.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenguest.a $(DESTDIR)/usr/$(LIBDIR)
+ ln -sf libxenguest.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxenguest.so.$(MAJOR)
+ ln -sf libxenguest.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenguest.so
.PHONY: TAGS clean rpm install all
@@ -100,18 +116,30 @@
mv staging/i386/*.rpm .
rm -rf staging
-libxc.a: $(LIB_OBJS)
+# libxenctrl
+
+libxenctrl.a: $(LIB_OBJS)
$(AR) rc $@ $^
-libxc-pic.a: $(PIC_OBJS)
+libxenctrl.so: libxenctrl.so.$(MAJOR)
+ ln -sf $< $@
+libxenctrl.so.$(MAJOR): libxenctrl.so.$(MAJOR).$(MINOR)
+ ln -sf $< $@
+
+libxenctrl.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenctrl.so.$(MAJOR)
-shared -o $@ $^
+
+# libxenguest
+
+libxenguest.a: $(LIB_BUILD_OBJS)
$(AR) rc $@ $^
-libxc.so: libxc.so.$(MAJOR)
+libxenguest.so: libxenguest.so.$(MAJOR)
ln -sf $< $@
-libxc.so.$(MAJOR): libxc.so.$(MAJOR).$(MINOR)
+libxenguest.so.$(MAJOR): libxenguest.so.$(MAJOR).$(MINOR)
ln -sf $< $@
-libxc.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxc.so.$(MAJOR) -shared -o
$@ $^ -lz
+libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenguest.so.$(MAJOR)
-shared -o $@ $^ -lz -lxenctrl
-include $(DEPS)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/linux_boot_params.h
--- a/tools/libxc/linux_boot_params.h Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/linux_boot_params.h Thu Aug 25 22:53:20 2005
@@ -17,6 +17,7 @@
#define E820_NVS 4
#define E820_IO 16
#define E820_SHARED 17
+#define E820_XENSTORE 18
u32 caching_attr; /* used by hypervisor */
#define MEMMAP_UC 0
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_core.c Thu Aug 25 22:53:20 2005
@@ -1,4 +1,4 @@
-#include "xc_private.h"
+#include "xg_private.h"
#define ELFSIZE 32
#include "xc_elf.h"
#include <stdlib.h>
@@ -43,7 +43,7 @@
goto error_out;
}
- if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == 0) {
+ if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
PERROR("Could not allocate dump_mem");
goto error_out;
}
@@ -108,9 +108,8 @@
free(dump_mem_start);
return 0;
error_out:
- if (dump_fd)
+ if (dump_fd != -1)
close(dump_fd);
- if (dump_mem_start)
- free(dump_mem_start);
+ free(dump_mem_start);
return -1;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_domain.c Thu Aug 25 22:53:20 2005
@@ -266,7 +266,7 @@
int err;
unsigned int npages = mem_kb / (PAGE_SIZE/1024);
- err = do_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
+ err = xc_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
npages, 0, domid);
if (err == npages)
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_gnttab.c
--- a/tools/libxc/xc_gnttab.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_gnttab.c Thu Aug 25 22:53:20 2005
@@ -40,17 +40,17 @@
int xc_gnttab_map_grant_ref(int xc_handle,
- memory_t host_virt_addr,
+ u64 host_virt_addr,
u32 dom,
u16 ref,
u16 flags,
s16 *handle,
- memory_t *dev_bus_addr)
+ u64 *dev_bus_addr)
{
struct gnttab_map_grant_ref op;
int rc;
- op.host_virt_addr = host_virt_addr;
+ op.host_addr = host_virt_addr;
op.dom = (domid_t)dom;
op.ref = ref;
op.flags = flags;
@@ -67,15 +67,15 @@
int xc_gnttab_unmap_grant_ref(int xc_handle,
- memory_t host_virt_addr,
- memory_t dev_bus_addr,
+ u64 host_virt_addr,
+ u64 dev_bus_addr,
u16 handle,
s16 *status)
{
struct gnttab_unmap_grant_ref op;
int rc;
- op.host_virt_addr = host_virt_addr;
+ op.host_addr = host_virt_addr;
op.dev_bus_addr = dev_bus_addr;
op.handle = handle;
@@ -92,7 +92,7 @@
u32 dom,
u16 nr_frames,
s16 *status,
- memory_t **frame_list)
+ unsigned long **frame_list)
{
struct gnttab_setup_table op;
int rc, i;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_build.c Thu Aug 25 22:53:20 2005
@@ -2,7 +2,8 @@
* xc_linux_build.c
*/
-#include "xc_private.h"
+#include "xg_private.h"
+#include <xenctrl.h>
#if defined(__i386__)
#define ELFSIZE 32
@@ -318,8 +319,7 @@
return 0;
error_out:
- if ( page_array != NULL )
- free(page_array);
+ free(page_array);
return -1;
}
#else /* x86 */
@@ -341,7 +341,7 @@
unsigned long count, i;
start_info_t *start_info;
shared_info_t *shared_info;
- mmu_t *mmu = NULL;
+ xc_mmu_t *mmu = NULL;
int rc;
unsigned long nr_pt_pages;
@@ -491,7 +491,7 @@
}
}
- if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
goto error_out;
/* setup page tables */
@@ -521,9 +521,9 @@
page_array[physmap_pfn++]);
for ( count = 0; count < nr_pages; count++ )
{
- if ( add_mmu_update(xc_handle, mmu,
- (page_array[count] << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE, count) )
+ if ( xc_add_mmu_update(xc_handle, mmu,
+ (page_array[count] << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, count) )
{
munmap(physmap, PAGE_SIZE);
goto error_out;
@@ -603,7 +603,7 @@
munmap(shared_info, PAGE_SIZE);
/* Send the page update requests down to the hypervisor. */
- if ( finish_mmu_updates(xc_handle, mmu) )
+ if ( xc_finish_mmu_updates(xc_handle, mmu) )
goto error_out;
free(mmu);
@@ -616,10 +616,8 @@
return 0;
error_out:
- if ( mmu != NULL )
- free(mmu);
- if ( page_array != NULL )
- free(page_array);
+ free(mmu);
+ free(page_array);
return -1;
}
#endif
@@ -679,7 +677,7 @@
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
- if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ if ( (xc_dom0_op(xc_handle, &op) < 0) ||
((u16)op.u.getdomaininfo.domain != domid) )
{
PERROR("Could not get info on domain");
@@ -719,8 +717,7 @@
close(initrd_fd);
if ( initrd_gfd )
gzclose(initrd_gfd);
- if ( image != NULL )
- free(image);
+ free(image);
#ifdef __ia64__
/* based on new_thread in xen/arch/ia64/domain.c */
@@ -797,7 +794,7 @@
launch_op.u.setdomaininfo.ctxt = ctxt;
launch_op.cmd = DOM0_SETDOMAININFO;
- rc = do_dom0_op(xc_handle, &launch_op);
+ rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
@@ -806,8 +803,7 @@
gzclose(initrd_gfd);
else if ( initrd_fd >= 0 )
close(initrd_fd);
- if ( image != NULL )
- free(image);
+ free(image);
return -1;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_restore.c Thu Aug 25 22:53:20 2005
@@ -6,7 +6,12 @@
* Copyright (c) 2003, K A Fraser.
*/
-#include "xc_private.h"
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include <xenctrl.h>
+
#include <xen/linux/suspend.h>
#define MAX_BATCH_SIZE 1024
@@ -32,7 +37,7 @@
#define PPRINTF(_f, _a...)
#endif
-ssize_t
+static ssize_t
read_exact(int fd, void *buf, size_t count)
{
int r = 0, s;
@@ -48,7 +53,8 @@
return r;
}
-int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns)
+int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns,
+ unsigned int store_evtchn, unsigned long *store_mfn)
{
dom0_op_t op;
int rc = 1, i, n, k;
@@ -88,7 +94,7 @@
char *region_base;
- mmu_t *mmu = NULL;
+ xc_mmu_t *mmu = NULL;
/* used by debug verify code */
unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
@@ -131,7 +137,7 @@
/* Get the domain's shared-info frame. */
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)dom;
- if (do_dom0_op(xc_handle, &op) < 0) {
+ if (xc_dom0_op(xc_handle, &op) < 0) {
ERR("Could not get information on new domain");
goto out;
}
@@ -157,7 +163,7 @@
goto out;
}
- mmu = init_mmu_updates(xc_handle, dom);
+ mmu = xc_init_mmu_updates(xc_handle, dom);
if (mmu == NULL) {
ERR("Could not initialise for MMU updates");
goto out;
@@ -354,8 +360,9 @@
}
}
- if ( add_mmu_update(xc_handle, mmu,
- (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
+ if ( xc_add_mmu_update(xc_handle, mmu,
+ (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+ pfn) )
{
printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
goto out;
@@ -369,7 +376,7 @@
DPRINTF("Received all pages\n");
- if ( finish_mmu_updates(xc_handle, mmu) )
+ if ( xc_finish_mmu_updates(xc_handle, mmu) )
goto out;
/*
@@ -387,14 +394,14 @@
pin[nr_pins].mfn = pfn_to_mfn_table[i];
if ( ++nr_pins == MAX_PIN_BATCH )
{
- if ( do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
+ if ( xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
goto out;
nr_pins = 0;
}
}
if ( (nr_pins != 0) &&
- (do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
+ (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
goto out;
DPRINTF("\b\b\b\b100%%\n");
@@ -434,7 +441,7 @@
if ( count > 0 )
{
- if ( (rc = do_dom_mem_op( xc_handle,
+ if ( (rc = xc_dom_mem_op( xc_handle,
MEMOP_decrease_reservation,
pfntab, count, 0, dom )) <0 )
{
@@ -464,10 +471,13 @@
}
ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn];
p_srec = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_WRITE, mfn);
+ xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
p_srec->resume_info.nr_pages = nr_pfns;
p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
p_srec->resume_info.flags = 0;
+ *store_mfn = p_srec->resume_info.store_mfn =
+ pfn_to_mfn_table[p_srec->resume_info.store_mfn];
+ p_srec->resume_info.store_evtchn = store_evtchn;
munmap(p_srec, PAGE_SIZE);
/* Uncanonicalise each GDT frame number. */
@@ -582,7 +592,7 @@
op.u.setdomaininfo.domain = (domid_t)dom;
op.u.setdomaininfo.vcpu = 0;
op.u.setdomaininfo.ctxt = &ctxt;
- rc = do_dom0_op(xc_handle, &op);
+ rc = xc_dom0_op(xc_handle, &op);
if ( rc != 0 )
{
@@ -593,7 +603,7 @@
DPRINTF("Domain ready to be unpaused\n");
op.cmd = DOM0_UNPAUSEDOMAIN;
op.u.unpausedomain.domain = (domid_t)dom;
- rc = do_dom0_op(xc_handle, &op);
+ rc = xc_dom0_op(xc_handle, &op);
if (rc == 0) {
/* Success: print the domain id. */
DPRINTF("DOM=%u\n", dom);
@@ -603,12 +613,9 @@
out:
if ( (rc != 0) && (dom != 0) )
xc_domain_destroy(xc_handle, dom);
- if ( mmu != NULL )
- free(mmu);
- if ( pfn_to_mfn_table != NULL )
- free(pfn_to_mfn_table);
- if ( pfn_type != NULL )
- free(pfn_type);
+ free(mmu);
+ free(pfn_to_mfn_table);
+ free(pfn_type);
DPRINTF("Restore exit with rc=%d\n", rc);
return rc;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_save.c Thu Aug 25 22:53:20 2005
@@ -7,11 +7,15 @@
*/
#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
#include <sys/time.h>
-#include "xc_private.h"
+
+#include "xg_private.h"
+
#include <xen/linux/suspend.h>
#include <xen/io/domain_controller.h>
-#include <time.h>
#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
@@ -20,7 +24,7 @@
#define DEBUG 0
#if 1
-#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a )
+#define ERR(_f, _a...) do { fprintf(stderr, _f , ## _a); fflush(stderr); }
while (0)
#else
#define ERR(_f, _a...) ((void)0)
#endif
@@ -136,7 +140,7 @@
return (new->tv_sec * 1000000) + new->tv_usec;
}
-static long long llgettimeofday()
+static long long llgettimeofday( void )
{
struct timeval now;
gettimeofday(&now, NULL);
@@ -312,9 +316,9 @@
}
-int suspend_and_state(int xc_handle, int io_fd, int dom,
- xc_dominfo_t *info,
- vcpu_guest_context_t *ctxt)
+static int suspend_and_state(int xc_handle, int io_fd, int dom,
+ xc_dominfo_t *info,
+ vcpu_guest_context_t *ctxt)
{
int i=0;
char ans[30];
@@ -429,7 +433,7 @@
- that should be sent this iteration (unless later marked as skip);
- to skip this iteration because already dirty;
- to fixup by sending at the end if not already resent; */
- unsigned long *to_send, *to_skip, *to_fix;
+ unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
xc_shadow_control_stats_t stats;
@@ -643,6 +647,22 @@
goto out;
}
+ /* Map the suspend-record MFN to pin it. The page must be owned by
+ dom for this to succeed. */
+ p_srec = xc_map_foreign_range(xc_handle, dom,
+ sizeof(*p_srec), PROT_READ | PROT_WRITE,
+ ctxt.user_regs.esi);
+ if (!p_srec){
+ ERR("Couldn't map suspend record");
+ goto out;
+ }
+
+ /* Canonicalize store mfn. */
+ if ( !translate_mfn_to_pfn(&p_srec->resume_info.store_mfn) ) {
+ ERR("Store frame is not in range of pseudophys map");
+ goto out;
+ }
+
print_stats( xc_handle, dom, 0, &stats, 0 );
/* Now write out each data page, canonicalising page tables as we go... */
@@ -756,7 +776,7 @@
goto out;
}
- if ( get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ){
+ if ( xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ){
ERR("get_pfn_type_batch failed");
goto out;
}
@@ -983,16 +1003,6 @@
}
}
- /* Map the suspend-record MFN to pin it. The page must be owned by
- dom for this to succeed. */
- p_srec = xc_map_foreign_range(xc_handle, dom,
- sizeof(*p_srec), PROT_READ,
- ctxt.user_regs.esi);
- if (!p_srec){
- ERR("Couldn't map suspend record");
- goto out;
- }
-
if (nr_pfns != p_srec->nr_pfns )
{
ERR("Suspend record nr_pfns unexpected (%ld != %ld)",
@@ -1045,8 +1055,11 @@
if(live_mfn_to_pfn_table)
munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024);
- if (pfn_type != NULL)
- free(pfn_type);
+ free(pfn_type);
+ free(pfn_batch);
+ free(to_send);
+ free(to_fix);
+ free(to_skip);
DPRINTF("Save exit rc=%d\n",rc);
return !!rc;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_aout9.c Thu Aug 25 22:53:20 2005
@@ -1,5 +1,5 @@
-#include "xc_private.h"
+#include "xg_private.h"
#include "xc_aout9.h"
#if defined(__i386__)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_bin.c Thu Aug 25 22:53:20 2005
@@ -66,7 +66,7 @@
* Free Software Foundation, Inc.
*/
-#include "xc_private.h"
+#include "xg_private.h"
#include <stdlib.h>
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_elf.c Thu Aug 25 22:53:20 2005
@@ -2,7 +2,7 @@
* xc_elf_load.c
*/
-#include "xc_private.h"
+#include "xg_private.h"
#if defined(__i386__)
#define ELFSIZE 32
@@ -309,8 +309,7 @@
dsi->v_end = round_pgup(maxva);
out:
- if ( p != NULL )
- free(p);
+ free(p);
return 0;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_private.c Thu Aug 25 22:53:20 2005
@@ -64,8 +64,8 @@
/*******************/
/* NB: arr must be mlock'ed */
-int get_pfn_type_batch(int xc_handle,
- u32 dom, int num, unsigned long *arr)
+int xc_get_pfn_type_batch(int xc_handle,
+ u32 dom, int num, unsigned long *arr)
{
dom0_op_t op;
op.cmd = DOM0_GETPAGEFRAMEINFO2;
@@ -92,25 +92,40 @@
return op.u.getpageframeinfo.type;
}
-
-
-/*******************/
-
-int pin_table(
- int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
-{
- struct mmuext_op op;
-
- op.cmd = type;
- op.mfn = mfn;
-
- if ( do_mmuext_op(xc_handle, &op, 1, dom) < 0 )
- return 1;
-
- return 0;
-}
-
-static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
+int xc_mmuext_op(
+ int xc_handle,
+ struct mmuext_op *op,
+ unsigned int nr_ops,
+ domid_t dom)
+{
+ privcmd_hypercall_t hypercall;
+ long ret = -EINVAL;
+
+ hypercall.op = __HYPERVISOR_mmuext_op;
+ hypercall.arg[0] = (unsigned long)op;
+ hypercall.arg[1] = (unsigned long)nr_ops;
+ hypercall.arg[2] = (unsigned long)0;
+ hypercall.arg[3] = (unsigned long)dom;
+
+ if ( mlock(op, nr_ops*sizeof(*op)) != 0 )
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ goto out1;
+ }
+
+ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+ {
+ fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+ " rebuild the user-space tool set?\n",ret,errno);
+ }
+
+ safe_munlock(op, nr_ops*sizeof(*op));
+
+ out1:
+ return ret;
+}
+
+static int flush_mmu_updates(int xc_handle, xc_mmu_t *mmu)
{
int err = 0;
privcmd_hypercall_t hypercall;
@@ -145,9 +160,9 @@
return err;
}
-mmu_t *init_mmu_updates(int xc_handle, domid_t dom)
-{
- mmu_t *mmu = malloc(sizeof(mmu_t));
+xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom)
+{
+ xc_mmu_t *mmu = malloc(sizeof(xc_mmu_t));
if ( mmu == NULL )
return mmu;
mmu->idx = 0;
@@ -155,8 +170,8 @@
return mmu;
}
-int add_mmu_update(int xc_handle, mmu_t *mmu,
- unsigned long ptr, unsigned long val)
+int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu,
+ unsigned long ptr, unsigned long val)
{
mmu->updates[mmu->idx].ptr = ptr;
mmu->updates[mmu->idx].val = val;
@@ -167,10 +182,47 @@
return 0;
}
-int finish_mmu_updates(int xc_handle, mmu_t *mmu)
+int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu)
{
return flush_mmu_updates(xc_handle, mmu);
}
+
+int xc_dom_mem_op(int xc_handle,
+ unsigned int memop,
+ unsigned int *extent_list,
+ unsigned int nr_extents,
+ unsigned int extent_order,
+ domid_t domid)
+{
+ privcmd_hypercall_t hypercall;
+ long ret = -EINVAL;
+
+ hypercall.op = __HYPERVISOR_dom_mem_op;
+ hypercall.arg[0] = (unsigned long)memop;
+ hypercall.arg[1] = (unsigned long)extent_list;
+ hypercall.arg[2] = (unsigned long)nr_extents;
+ hypercall.arg[3] = (unsigned long)extent_order;
+ hypercall.arg[4] = (unsigned long)domid;
+
+ if ( (extent_list != NULL) &&
+ (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ goto out1;
+ }
+
+ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+ {
+ fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+ " rebuild the user-space tool set?\n",ret,errno);
+ }
+
+ if ( extent_list != NULL )
+ safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
+
+ out1:
+ return ret;
+}
long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu )
@@ -189,19 +241,6 @@
return op.u.getvcpucontext.cpu_time;
}
-
-/* This is shared between save and restore, and may generally be useful. */
-unsigned long csum_page (void * page)
-{
- int i;
- unsigned long *p = page;
- unsigned long long sum=0;
-
- for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
- sum += p[i];
-
- return sum ^ (sum>>32);
-}
unsigned long xc_get_m2p_start_mfn ( int xc_handle )
{
@@ -332,53 +371,6 @@
return sz;
}
-char *xc_read_kernel_image(const char *filename, unsigned long *size)
-{
- int kernel_fd = -1;
- gzFile kernel_gfd = NULL;
- char *image = NULL;
- unsigned int bytes;
-
- if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
- {
- PERROR("Could not open kernel image");
- goto out;
- }
-
- if ( (*size = xc_get_filesz(kernel_fd)) == 0 )
- {
- PERROR("Could not read kernel image");
- goto out;
- }
-
- if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
- {
- PERROR("Could not allocate decompression state for state file");
- goto out;
- }
-
- if ( (image = malloc(*size)) == NULL )
- {
- PERROR("Could not allocate memory for kernel image");
- goto out;
- }
-
- if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
- {
- PERROR("Error reading kernel image, could not"
- " read the whole image (%d != %ld).", bytes, *size);
- free(image);
- image = NULL;
- }
-
- out:
- if ( kernel_gfd != NULL )
- gzclose(kernel_gfd);
- else if ( kernel_fd >= 0 )
- close(kernel_fd);
- return image;
-}
-
void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
int xch, u32 dom, unsigned long *parray,
unsigned long vstart)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_private.h Thu Aug 25 22:53:20 2005
@@ -1,123 +1,25 @@
-#ifndef __XC_PRIVATE_H__
-#define __XC_PRIVATE_H__
+#ifndef XC_PRIVATE_H
+#define XC_PRIVATE_H
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
+#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <sys/ioctl.h>
-#include <errno.h>
-#include <string.h>
-#include "xc.h"
+#include "xenctrl.h"
#include <xen/linux/privcmd.h>
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PAT 0x080
-#define _PAGE_PSE 0x080
-#define _PAGE_GLOBAL 0x100
-
-#if defined(__i386__)
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-#define L1_PAGETABLE_SHIFT_PAE 12
-#define L2_PAGETABLE_SHIFT_PAE 21
-#define L3_PAGETABLE_SHIFT_PAE 30
-#elif defined(__x86_64__)
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 21
-#define L3_PAGETABLE_SHIFT 30
-#define L4_PAGETABLE_SHIFT 39
-#endif
-
-#if defined(__i386__)
-#define ENTRIES_PER_L1_PAGETABLE 1024
-#define ENTRIES_PER_L2_PAGETABLE 1024
-#define L1_PAGETABLE_ENTRIES_PAE 512
-#define L2_PAGETABLE_ENTRIES_PAE 512
-#define L3_PAGETABLE_ENTRIES_PAE 4
-#elif defined(__x86_64__)
-#define L1_PAGETABLE_ENTRIES 512
-#define L2_PAGETABLE_ENTRIES 512
-#define L3_PAGETABLE_ENTRIES 512
-#define L4_PAGETABLE_ENTRIES 512
-#endif
-
#define PAGE_SHIFT XC_PAGE_SHIFT
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
-
-typedef u32 l1_pgentry_32_t;
-typedef u32 l2_pgentry_32_t;
-typedef u64 l1_pgentry_64_t;
-typedef u64 l2_pgentry_64_t;
-typedef u64 l3_pgentry_64_t;
-typedef unsigned long l1_pgentry_t;
-typedef unsigned long l2_pgentry_t;
-#if defined(__x86_64__)
-typedef unsigned long l3_pgentry_t;
-typedef unsigned long l4_pgentry_t;
-#endif
-
-#if defined(__i386__)
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
-#define l2_table_offset(_a) \
- ((_a) >> L2_PAGETABLE_SHIFT)
-#define l1_table_offset_pae(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT_PAE) & (L1_PAGETABLE_ENTRIES_PAE - 1))
-#define l2_table_offset_pae(_a) \
- (((_a) >> L2_PAGETABLE_SHIFT_PAE) & (L2_PAGETABLE_ENTRIES_PAE - 1))
-#define l3_table_offset_pae(_a) \
- (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
-#elif defined(__x86_64__)
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
-#define l2_table_offset(_a) \
- (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#define l3_table_offset(_a) \
- (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
-#define l4_table_offset(_a) \
- (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
-#endif
-
-struct domain_setup_info
-{
- unsigned long v_start;
- unsigned long v_end;
- unsigned long v_kernstart;
- unsigned long v_kernend;
- unsigned long v_kernentry;
-
- unsigned int load_symtab;
- unsigned int pae_kernel;
- unsigned long symtab_addr;
- unsigned long symtab_len;
-};
-
-typedef int (*parseimagefunc)(char *image, unsigned long image_size,
- struct domain_setup_info *dsi);
-typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch,
- u32 dom, unsigned long *parray,
- struct domain_setup_info *dsi);
-
-struct load_funcs
-{
- parseimagefunc parseimage;
- loadimagefunc loadimage;
-};
#define ERROR(_m, _a...) \
do { \
@@ -186,97 +88,6 @@
return ret;
}
-static inline int do_dom_mem_op(int xc_handle,
- unsigned int memop,
- unsigned int *extent_list,
- unsigned int nr_extents,
- unsigned int extent_order,
- domid_t domid)
-{
- privcmd_hypercall_t hypercall;
- long ret = -EINVAL;
-
- hypercall.op = __HYPERVISOR_dom_mem_op;
- hypercall.arg[0] = (unsigned long)memop;
- hypercall.arg[1] = (unsigned long)extent_list;
- hypercall.arg[2] = (unsigned long)nr_extents;
- hypercall.arg[3] = (unsigned long)extent_order;
- hypercall.arg[4] = (unsigned long)domid;
-
- if ( (extent_list != NULL) &&
- (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
- {
- PERROR("Could not lock memory for Xen hypercall");
- goto out1;
- }
-
- if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
- {
- fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
- " rebuild the user-space tool set?\n",ret,errno);
- }
-
- if ( extent_list != NULL )
- safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
-
- out1:
- return ret;
-}
-
-static inline int do_mmuext_op(
- int xc_handle,
- struct mmuext_op *op,
- unsigned int nr_ops,
- domid_t dom)
-{
- privcmd_hypercall_t hypercall;
- long ret = -EINVAL;
-
- hypercall.op = __HYPERVISOR_mmuext_op;
- hypercall.arg[0] = (unsigned long)op;
- hypercall.arg[1] = (unsigned long)nr_ops;
- hypercall.arg[2] = (unsigned long)0;
- hypercall.arg[3] = (unsigned long)dom;
-
- if ( mlock(op, nr_ops*sizeof(*op)) != 0 )
- {
- PERROR("Could not lock memory for Xen hypercall");
- goto out1;
- }
-
- if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
- {
- fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
- " rebuild the user-space tool set?\n",ret,errno);
- }
-
- safe_munlock(op, nr_ops*sizeof(*op));
-
- out1:
- return ret;
-}
-
-
-/*
- * PFN mapping.
- */
-int get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr);
-unsigned long csum_page (void * page);
-
-/*
- * MMU updates.
- */
-#define MAX_MMU_UPDATES 1024
-typedef struct {
- mmu_update_t updates[MAX_MMU_UPDATES];
- int idx;
- domid_t subject;
-} mmu_t;
-mmu_t *init_mmu_updates(int xc_handle, domid_t dom);
-int add_mmu_update(int xc_handle, mmu_t *mmu,
- unsigned long ptr, unsigned long val);
-int finish_mmu_updates(int xc_handle, mmu_t *mmu);
-
/*
* ioctl-based mfn mapping interface
@@ -296,38 +107,4 @@
} privcmd_mmap_t;
*/
-#define mfn_mapper_queue_size 128
-
-typedef struct mfn_mapper {
- int xc_handle;
- int size;
- int prot;
- int error;
- int max_queue_size;
- void * addr;
- privcmd_mmap_t ioctl;
-
-} mfn_mapper_t;
-
-unsigned long xc_get_m2p_start_mfn (int xc_handle);
-
-int xc_copy_to_domain_page(int xc_handle, u32 domid,
- unsigned long dst_pfn, void *src_page);
-
-unsigned long xc_get_filesz(int fd);
-
-char *xc_read_kernel_image(const char *filename, unsigned long *size);
-
-void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
- int xch, u32 dom, unsigned long *parray,
- unsigned long vstart);
-
-int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
- domid_t dom);
-
-/* image loading */
-int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs);
-int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs);
-int probe_aout9(char *image, unsigned long image_size, struct load_funcs
*funcs);
-
#endif /* __XC_PRIVATE_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_ptrace.c Thu Aug 25 22:53:20 2005
@@ -221,7 +221,7 @@
return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
error_out:
- return 0;
+ return NULL;
}
int
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_vmx_build.c Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
*/
#include <stddef.h>
-#include "xc_private.h"
+#include "xg_private.h"
#define ELFSIZE 32
#include "xc_elf.h"
#include <stdlib.h>
@@ -37,58 +37,70 @@
int nr_map = 0;
/* XXX: Doesn't work for > 4GB yet */
- mem_mapp->map[0].addr = 0x0;
- mem_mapp->map[0].size = 0x9F800;
- mem_mapp->map[0].type = E820_RAM;
- mem_mapp->map[0].caching_attr = MEMMAP_WB;
+ mem_mapp->map[nr_map].addr = 0x0;
+ mem_mapp->map[nr_map].size = 0x9F800;
+ mem_mapp->map[nr_map].type = E820_RAM;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
nr_map++;
- mem_mapp->map[1].addr = 0x9F800;
- mem_mapp->map[1].size = 0x800;
- mem_mapp->map[1].type = E820_RESERVED;
- mem_mapp->map[1].caching_attr = MEMMAP_UC;
+ mem_mapp->map[nr_map].addr = 0x9F800;
+ mem_mapp->map[nr_map].size = 0x800;
+ mem_mapp->map[nr_map].type = E820_RESERVED;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
nr_map++;
- mem_mapp->map[2].addr = 0xA0000;
- mem_mapp->map[2].size = 0x20000;
- mem_mapp->map[2].type = E820_IO;
- mem_mapp->map[2].caching_attr = MEMMAP_UC;
+ mem_mapp->map[nr_map].addr = 0xA0000;
+ mem_mapp->map[nr_map].size = 0x20000;
+ mem_mapp->map[nr_map].type = E820_IO;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
nr_map++;
- mem_mapp->map[3].addr = 0xF0000;
- mem_mapp->map[3].size = 0x10000;
- mem_mapp->map[3].type = E820_RESERVED;
- mem_mapp->map[3].caching_attr = MEMMAP_UC;
+ mem_mapp->map[nr_map].addr = 0xF0000;
+ mem_mapp->map[nr_map].size = 0x10000;
+ mem_mapp->map[nr_map].type = E820_RESERVED;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
nr_map++;
- mem_mapp->map[4].addr = 0x100000;
- mem_mapp->map[4].size = mem_size - 0x100000 - PAGE_SIZE;
- mem_mapp->map[4].type = E820_RAM;
- mem_mapp->map[4].caching_attr = MEMMAP_WB;
+#define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
+ /* Most of the ram goes here */
+ mem_mapp->map[nr_map].addr = 0x100000;
+ mem_mapp->map[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
+ mem_mapp->map[nr_map].type = E820_RAM;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
nr_map++;
- mem_mapp->map[5].addr = mem_size - PAGE_SIZE;
- mem_mapp->map[5].size = PAGE_SIZE;
- mem_mapp->map[5].type = E820_SHARED;
- mem_mapp->map[5].caching_attr = MEMMAP_WB;
+ /* Statically allocated special pages */
+
+ /* Shared ioreq_t page */
+ mem_mapp->map[nr_map].addr = mem_size - PAGE_SIZE;
+ mem_mapp->map[nr_map].size = PAGE_SIZE;
+ mem_mapp->map[nr_map].type = E820_SHARED;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
nr_map++;
- mem_mapp->map[6].addr = mem_size;
- mem_mapp->map[6].size = 0x3 * PAGE_SIZE;
- mem_mapp->map[6].type = E820_NVS;
- mem_mapp->map[6].caching_attr = MEMMAP_UC;
+ /* For xenstore */
+ mem_mapp->map[nr_map].addr = mem_size - 2*PAGE_SIZE;
+ mem_mapp->map[nr_map].size = PAGE_SIZE;
+ mem_mapp->map[nr_map].type = E820_XENSTORE;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
nr_map++;
- mem_mapp->map[7].addr = mem_size + 0x3 * PAGE_SIZE;
- mem_mapp->map[7].size = 0xA * PAGE_SIZE;
- mem_mapp->map[7].type = E820_ACPI;
- mem_mapp->map[7].caching_attr = MEMMAP_WB;
+ mem_mapp->map[nr_map].addr = mem_size;
+ mem_mapp->map[nr_map].size = 0x3 * PAGE_SIZE;
+ mem_mapp->map[nr_map].type = E820_NVS;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
nr_map++;
- mem_mapp->map[8].addr = 0xFEC00000;
- mem_mapp->map[8].size = 0x1400000;
- mem_mapp->map[8].type = E820_IO;
- mem_mapp->map[8].caching_attr = MEMMAP_UC;
+ mem_mapp->map[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
+ mem_mapp->map[nr_map].size = 0xA * PAGE_SIZE;
+ mem_mapp->map[nr_map].type = E820_ACPI;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
+ nr_map++;
+
+ mem_mapp->map[nr_map].addr = 0xFEC00000;
+ mem_mapp->map[nr_map].size = 0x1400000;
+ mem_mapp->map[nr_map].type = E820_IO;
+ mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
nr_map++;
mem_mapp->nr_map = nr_map;
@@ -212,7 +224,11 @@
unsigned long shared_info_frame,
unsigned int control_evtchn,
unsigned long flags,
- struct mem_map * mem_mapp)
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn,
+ struct mem_map *mem_mapp
+ )
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -227,7 +243,7 @@
shared_info_t *shared_info;
struct linux_boot_params * boot_paramsp;
__u16 * boot_gdtp;
- mmu_t *mmu = NULL;
+ xc_mmu_t *mmu = NULL;
int rc;
unsigned long nr_pt_pages;
@@ -342,7 +358,7 @@
}
}
- if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
goto error_out;
#ifdef __i386__
@@ -443,9 +459,9 @@
/* Write the machine->phys table entries. */
for ( count = 0; count < nr_pages; count++ )
{
- if ( add_mmu_update(xc_handle, mmu,
- (page_array[count] << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE, count) )
+ if ( xc_add_mmu_update(xc_handle, mmu,
+ (page_array[count] << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, count) )
goto error_out;
}
@@ -510,7 +526,10 @@
boot_paramsp->drive_info.dummy[14] = 32;
/* memsize is in megabytes */
+ /* If you need to create a special e820map, comment this line
+ and use mem-map.sxp */
build_e820map(mem_mapp, memsize << 20);
+ *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
#if defined (__i386__)
if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
#else
@@ -568,7 +587,7 @@
#endif
/* Send the page update requests down to the hypervisor. */
- if ( finish_mmu_updates(xc_handle, mmu) )
+ if ( xc_finish_mmu_updates(xc_handle, mmu) )
goto error_out;
free(mmu);
@@ -597,17 +616,15 @@
return 0;
error_out:
- if ( mmu != NULL )
- free(mmu);
- if ( page_array != NULL )
- free(page_array);
+ free(mmu);
+ free(page_array);
return -1;
}
#define VMX_FEATURE_FLAG 0x20
-int vmx_identify(void)
+static int vmx_identify(void)
{
int eax, ecx;
@@ -637,7 +654,10 @@
const char *ramdisk_name,
const char *cmdline,
unsigned int control_evtchn,
- unsigned long flags)
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
@@ -688,7 +708,7 @@
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
- if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ if ( (xc_dom0_op(xc_handle, &op) < 0) ||
((u16)op.u.getdomaininfo.domain != domid) )
{
PERROR("Could not get info on domain");
@@ -712,7 +732,8 @@
initrd_gfd, initrd_size, nr_pages,
ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn, flags, mem_mapp) < 0 )
+ control_evtchn, flags, vcpus, store_evtchn, store_mfn,
+ mem_mapp) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -722,8 +743,7 @@
close(initrd_fd);
if ( initrd_gfd )
gzclose(initrd_gfd);
- if ( image != NULL )
- free(image);
+ free(image);
ctxt->flags = VGCF_VMX_GUEST;
/* FPU is set up to default initial state. */
@@ -769,7 +789,7 @@
launch_op.u.setdomaininfo.ctxt = ctxt;
launch_op.cmd = DOM0_SETDOMAININFO;
- rc = do_dom0_op(xc_handle, &launch_op);
+ rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
@@ -778,8 +798,7 @@
gzclose(initrd_gfd);
else if ( initrd_fd >= 0 )
close(initrd_fd);
- if ( image != NULL )
- free(image);
+ free(image);
return -1;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/Makefile
--- a/tools/misc/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/misc/Makefile Thu Aug 25 22:53:20 2005
@@ -50,4 +50,4 @@
$(CC) -c $(CFLAGS) -o $@ $<
$(TARGETS): %: %.o Makefile
- $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
+ $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/cpuperf/Makefile
--- a/tools/misc/cpuperf/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/misc/cpuperf/Makefile Thu Aug 25 22:53:20 2005
@@ -37,7 +37,7 @@
$(CC) $(CFLAGS) -o $@ $<
cpuperf-xen: cpuperf.c $(HDRS) Makefile
- $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -DXENO -o $@ $<
+ $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxenctrl -DXENO -o $@ $<
cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
$(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/cpuperf/cpuperf_xeno.h
--- a/tools/misc/cpuperf/cpuperf_xeno.h Wed Aug 24 02:43:18 2005
+++ b/tools/misc/cpuperf/cpuperf_xeno.h Thu Aug 25 22:53:20 2005
@@ -9,7 +9,7 @@
*
*/
-#include <xc.h>
+#include <xenctrl.h>
static int xc_handle;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xc_shadow.c
--- a/tools/misc/xc_shadow.c Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xc_shadow.c Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xend
--- a/tools/misc/xend Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xend Thu Aug 25 22:53:20 2005
@@ -24,6 +24,7 @@
import socket
import signal
import time
+import commands
XCS_PATH = "/var/lib/xen/xcs_socket"
XCS_EXEC = "/usr/sbin/xcs"
@@ -114,6 +115,17 @@
xcs_pidfile.close()
except:
return
+
+def start_xenstored():
+ XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
+ cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
+ if XENSTORED_TRACE:
+ cmd += " -T /var/log/xenstored-trace.log"
+ s,o = commands.getstatusoutput(cmd)
+
+def start_consoled():
+ if os.fork() == 0:
+ os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled'])
def main():
try:
@@ -130,9 +142,13 @@
return status >> 8
elif sys.argv[1] == 'start':
start_xcs()
+ start_xenstored()
+ start_consoled()
return daemon.start()
elif sys.argv[1] == 'trace_start':
start_xcs()
+ start_xenstored()
+ start_consoled()
return daemon.start(trace=1)
elif sys.argv[1] == 'stop':
stop_xcs()
@@ -140,6 +156,8 @@
elif sys.argv[1] == 'restart':
stop_xcs()
start_xcs()
+ start_xenstored()
+ start_consoled()
return daemon.stop() or daemon.start()
elif sys.argv[1] == 'status':
return daemon.status()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xenperf.c
--- a/tools/misc/xenperf.c Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xenperf.c Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
*/
-#include <xc.h>
+#include <xenctrl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/setup.py
--- a/tools/python/setup.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/setup.py Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
XEN_ROOT + "/tools/xenstore",
]
-libraries = [ "xc", "xenstore-pic" ]
+libraries = [ "xenctrl", "xenguest", "xenstore" ]
xc = Extension("xc",
extra_compile_args = extra_compile_args,
@@ -41,7 +41,7 @@
sources = [ "xen/lowlevel/xs/xs.c" ])
setup(name = 'xen',
- version = '2.0',
+ version = '3.0',
description = 'Xen',
packages = ['xen',
'xen.lowlevel',
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Aug 25 22:53:20 2005
@@ -5,7 +5,8 @@
*/
#include <Python.h>
-#include <xc.h>
+#include <xenctrl.h>
+#include <xenguest.h>
#include <zlib.h>
#include <fcntl.h>
#include <netinet/in.h>
@@ -297,22 +298,23 @@
u32 dom;
char *image, *ramdisk = NULL, *cmdline = "";
PyObject *memmap;
- int control_evtchn, flags = 0;
+ int control_evtchn, store_evtchn;
+ int flags = 0, vcpus = 1;
int numItems, i;
int memsize;
struct mem_map mem_map;
-
- static char *kwd_list[] = { "dom", "control_evtchn",
- "memsize",
- "image", "memmap",
+ unsigned long store_mfn = 0;
+
+ static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
+ "memsize", "image", "memmap",
"ramdisk", "cmdline", "flags",
- NULL };
-
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisO!|ssi", kwd_list,
- &dom, &control_evtchn,
+ "vcpus", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisO!|ssii", kwd_list,
+ &dom, &control_evtchn, &store_evtchn,
&memsize,
&image, &PyList_Type, &memmap,
- &ramdisk, &cmdline, &flags) )
+ &ramdisk, &cmdline, &flags, &vcpus) )
return NULL;
memset(&mem_map, 0, sizeof(mem_map));
@@ -321,7 +323,6 @@
/* get the number of lines passed to us */
numItems = PyList_Size(memmap) - 1; /* removing the line
containing "memmap" */
- printf ("numItems: %d\n", numItems);
mem_map.nr_map = numItems;
/* should raise an error here. */
@@ -365,11 +366,11 @@
}
if ( xc_vmx_build(xc->xc_handle, dom, memsize, image, &mem_map,
- ramdisk, cmdline, control_evtchn, flags) != 0 )
- return PyErr_SetFromErrno(xc_error);
-
- Py_INCREF(zero);
- return zero;
+ ramdisk, cmdline, control_evtchn, flags,
+ vcpus, store_evtchn, &store_mfn) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
}
static PyObject *pyxc_bvtsched_global_set(PyObject *self,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Thu Aug 25 22:53:20 2005
@@ -1,6 +1,21 @@
/*
* Python interface to the Xen Store Daemon.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
* Copyright (C) 2005 Mike Wray Hewlett-Packard
+ *
*/
#include <Python.h>
@@ -253,12 +268,10 @@
}
val = PyList_New(perms_n);
for (i = 0; i < perms_n; i++, perms++) {
- PyObject *p = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i}",
- "dom", perms->id,
- "read", (perms->perms & XS_PERM_READ),
- "write", (perms->perms & XS_PERM_WRITE),
- "create", (perms->perms & XS_PERM_CREATE),
- "owner", (perms->perms & XS_PERM_OWNER));
+ PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
+ "dom", perms->id,
+ "read", (perms->perms & XS_PERM_READ),
+ "write", (perms->perms & XS_PERM_WRITE));
PyList_SetItem(val, i, p);
}
exit:
@@ -281,8 +294,7 @@
static char *arg_spec = "sO";
char *path = NULL;
PyObject *perms = NULL;
- static char *perm_names[] = { "dom", "read", "write", "create", "owner",
- NULL };
+ static char *perm_names[] = { "dom", "read", "write", NULL };
static char *perm_spec = "i|iiii";
struct xs_handle *xh = xshandle(self);
@@ -315,15 +327,9 @@
int dom = 0;
/* Read/write perms. Set these. */
int p_read = 0, p_write = 0;
- /* Create/owner perms. Ignore them.
- * This is so the output from get_permissions() can be used
- * as input to set_permissions().
- */
- int p_create = 0, p_owner = 0;
PyObject *p = PyList_GetItem(perms, i);
if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names,
- &dom, &p_read, &p_write, &p_create,
- &p_owner))
+ &dom, &p_read, &p_write))
goto exit;
xsperms[i].id = dom;
if (p_read)
@@ -343,7 +349,6 @@
#define xspy_watch_doc "\n" \
"Watch a path, get notifications when it changes.\n" \
" path [string] : xenstore path.\n" \
- " priority [int] : watch priority (default 0).\n" \
" token [string] : returned in watch notification.\n" \
"\n" \
"Returns: [int] 0 on success.\n" \
@@ -352,10 +357,9 @@
static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds)
{
- static char *kwd_spec[] = { "path", "priority", "token", NULL };
+ static char *kwd_spec[] = { "path", "token", NULL };
static char *arg_spec = "s|is";
char *path = NULL;
- int priority = 0;
char *token = "";
struct xs_handle *xh = xshandle(self);
@@ -365,7 +369,7 @@
if (!xh)
goto exit;
if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
- &path, &priority, &token))
+ &path, &token))
goto exit;
xsval = xs_watch(xh, path, token);
val = pyvalue_int(xsval);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xu/xu.c
--- a/tools/python/xen/lowlevel/xu/xu.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xu/xu.c Thu Aug 25 22:53:20 2005
@@ -21,7 +21,7 @@
#include <unistd.h>
#include <errno.h>
#include <signal.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/io/domain_controller.h>
@@ -655,7 +655,9 @@
case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_CONNECT):
C2P(netif_fe_interface_connect_t, handle, Int, Long);
C2P(netif_fe_interface_connect_t, tx_shmem_frame, Int, Long);
+ C2P(netif_fe_interface_connect_t, tx_shmem_ref, Int, Long);
C2P(netif_fe_interface_connect_t, rx_shmem_frame, Int, Long);
+ C2P(netif_fe_interface_connect_t, rx_shmem_ref, Int, Long);
return dict;
case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_DISCONNECT):
C2P(netif_fe_interface_disconnect_t, handle, Int, Long);
@@ -681,7 +683,9 @@
C2P(netif_be_connect_t, domid, Int, Long);
C2P(netif_be_connect_t, netif_handle, Int, Long);
C2P(netif_be_connect_t, tx_shmem_frame, Int, Long);
+ C2P(netif_be_connect_t, tx_shmem_ref, Int, Long);
C2P(netif_be_connect_t, rx_shmem_frame, Int, Long);
+ C2P(netif_be_connect_t, rx_shmem_ref, Int, Long);
C2P(netif_be_connect_t, evtchn, Int, Long);
C2P(netif_be_connect_t, status, Int, Long);
return dict;
@@ -840,7 +844,7 @@
case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CONNECT):
P2C(blkif_be_connect_t, domid, u32);
P2C(blkif_be_connect_t, blkif_handle, u32);
- P2C(blkif_be_connect_t, shmem_frame, memory_t);
+ P2C(blkif_be_connect_t, shmem_frame, unsigned long);
P2C(blkif_be_connect_t, shmem_ref, u32);
P2C(blkif_be_connect_t, evtchn, u16);
break;
@@ -902,9 +906,11 @@
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT):
P2C(netif_be_connect_t, domid, u32);
P2C(netif_be_connect_t, netif_handle, u32);
- P2C(netif_be_connect_t, tx_shmem_frame, memory_t);
- P2C(netif_be_connect_t, rx_shmem_frame, memory_t);
- P2C(netif_be_connect_t, evtchn, u16);
+ P2C(netif_be_connect_t, tx_shmem_frame, unsigned long);
+ P2C(netif_be_connect_t, tx_shmem_ref, u32);
+ P2C(netif_be_connect_t, rx_shmem_frame, unsigned long);
+ P2C(netif_be_connect_t, rx_shmem_ref, u32);
+ P2C(netif_be_connect_t, evtchn, u16);
break;
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DISCONNECT):
P2C(netif_be_disconnect_t, domid, u32);
@@ -936,7 +942,7 @@
P2C(usbif_fe_driver_status_changed_t, status, u32);
break;
case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT):
- P2C(usbif_fe_interface_connect_t, shmem_frame, memory_t);
+ P2C(usbif_fe_interface_connect_t, shmem_frame, unsigned long);
break;
case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT):
break;
@@ -950,7 +956,7 @@
break;
case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT):
P2C(usbif_be_connect_t, domid, domid_t);
- P2C(usbif_be_connect_t, shmem_frame, memory_t);
+ P2C(usbif_be_connect_t, shmem_frame, unsigned long);
P2C(usbif_be_connect_t, evtchn, u32);
P2C(usbif_be_connect_t, bandwidth, u32);
P2C(usbif_be_connect_t, status, u32);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/CreateDomain.py
--- a/tools/python/xen/sv/CreateDomain.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/CreateDomain.py Thu Aug 25 22:53:20 2005
@@ -17,26 +17,56 @@
CreateFinish ]
Wizard.__init__( self, urlWriter, "Create Domain", sheets )
-
+
+ def op_finish( self, request ):
+ pass
+
class CreatePage0( Sheet ):
+ title = "General"
+
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "General", 0 )
self.addControl( InputControl( 'name', 'VM Name', 'VM Name:',
"[\\w|\\S]+", "You must enter a name in this field" ) )
self.addControl( InputControl( 'memory', '64', 'Memory (Mb):',
"[\\d]+", "You must enter a number in this field" ) )
self.addControl( InputControl( 'cpu', '0', 'CPU:', "[\\d]+", "You must
enter a number in this feild" ) )
self.addControl( InputControl( 'cpu_weight', '1', 'CPU Weight:',
"[\\d]+", "You must enter a number in this feild" ) )
+ self.addControl( InputControl( 'vcpus', '1', 'Virtual CPUs:',
'[\\d]+', "You must enter a number in this feild") )
class CreatePage1( Sheet ):
+ title = "Setup Kernel Image"
+
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "Setup Kernel Image", 1 )
-# For now we don't need to select a builder...
-# self.addControl( ListControl( 'builder', [('linux', 'Linux'),
('netbsd', 'NetBSD')], 'Kernel Type:' ) )
- self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.9-xenU',
'Kernel Image:' ) )
+ self.addControl( ListControl( 'builder', [('linux', 'Linux'),
('netbsd', 'NetBSD')], 'Domain Builder:' ) )
+ self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.12-xenU',
'Kernel Image:' ) )
self.addControl( InputControl( 'extra', '', 'Kernel Command Line
Parameters:' ) )
+ self.addControl( ListControl( 'use-initrd', [('yes', 'Yes'), ('no',
'No')], 'Use an Initial Ram Disk?:' ) )
+ self.addControl( FileControl( 'initrd',
'/boot/initrd-2.6.12-xenU.img', 'Initial Ram Disk:' ) )
+
+ def validate( self, request ):
+ if not self.passback: self.parseForm( request )
+ check = True
+ request.write( previous_values.get( '>>>>>use-initrd' ) )
+ previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the
map for quick reference
+ if DEBUG: print previous_values
+ for (feild, control) in self.feilds:
+ if feild == 'initrd' and previous_values.get( 'use-initrd' ) !=
'no':
+ request.write( previous_values.get( '>>>>>use-initrd' ) )
+ if control.validate( previous_values.get( feild ) ):
+ check = False
+ elif not control.validate( previous_values.get( feild ) ):
+ check = False
+
+ if DEBUG: print "> %s = %s" % (feild, previous_values.get( feild ))
+
+ return check
+
class CreatePage2( Sheet ):
+
+ title = "Choose number of VBDS"
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 2 )
@@ -44,10 +74,12 @@
class CreatePage3( Sheet ):
+ title = "Setup VBDS"
+
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 3 )
- def write_BODY( self, request, err ):
+ def write_BODY( self, request ):
if not self.passback: self.parseForm( request )
previous_values = sxp2hash( string2sxp( self.passback ) ) #get the hash
for quick reference
@@ -61,9 +93,11 @@
self.addControl( InputControl( 'root', '/dev/sda1', 'Root device (in
VM):' ) )
- Sheet.write_BODY( self, request, err )
+ Sheet.write_BODY( self, request )
class CreatePage4( Sheet ):
+
+ title = "Network Setting"
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "Network settings", 4 )
@@ -76,26 +110,27 @@
class CreateFinish( Sheet ):
+ title = "Finish"
+
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "All Done", 5 )
- def write_BODY( self, request, err ):
+ def write_BODY( self, request ):
if not self.passback: self.parseForm( request )
xend_sxp = self.translate_sxp( string2sxp( self.passback ) )
+
+ request.write( "<pre>%s</pre>" % sxp2prettystring( xend_sxp ) )
try:
- dom_sxp = server.xend_domain_create( xend_sxp )
- success = "Your domain was successfully created.\n"
- except:
- success = "There was an error creating your domain.\nThe
configuration used is as follows:\n"
- dom_sxp = xend_sxp
-
-
-
- pt = PreTab( success + sxp2prettystring( dom_sxp ) )
- pt.write_BODY( request )
+ server.xend_domain_create( xend_sxp )
+ request.write( "<p>You domain had been successfully created.</p>" )
+ except Exception, e:
+ request.write( "<p>There was an error creating your
domain.<br/>The configuration used is as follows:\n</p>" )
+ request.write( "<pre>%s</pre>" % sxp2prettystring( xend_sxp ) )
+ request.write( "<p>The error was:</p>" )
+ request.write( "<pre>%s</pre>" % str( e ) )
request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
@@ -117,6 +152,7 @@
vals.maxmem = get( 'maxmem' )
vals.cpu = get( 'cpu' )
vals.cpu_weight = get( 'cpu_weight' )
+ vals.vcpus = get( 'vcpus' )
vals.builder = get( 'builder' )
vals.kernel = get( 'kernel' )
@@ -128,7 +164,7 @@
vbds = []
for i in range( int( get( 'num_vbds' ) ) ):
- vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ),
get( 'vbd%s_mode' % i ) ) )
+ vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ),
get( 'vbd%s_mode' % i ), None ) )
vals.disk = vbds
@@ -141,6 +177,9 @@
vals.restart = None
vals.console = None
vals.ramdisk = None
+ vals.ssidref = -1
+ vals.bootloader = None
+ vals.usb = []
#setup vifs
@@ -155,9 +194,11 @@
dhcp = get( 'dhcp' )
vals.cmdline_ip = "%s:%s:%s:%s:%s:eth0:%s" % (ip, nfs, gate, mask,
host, dhcp)
+
+ opts = None
try:
- return make_config( vals )
- except:
- return [["Error creating domain config."]]
-
+ return make_config( opts, vals )
+ except Exception, e:
+ return [["There was an error creating the domain config SXP. This
is typically due to an interface change in xm/create.py:make_config", e]]
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/DomInfo.py
--- a/tools/python/xen/sv/DomInfo.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/DomInfo.py Thu Aug 25 22:53:20 2005
@@ -4,6 +4,7 @@
from xen.sv.HTMLBase import HTMLBase
from xen.sv.util import *
from xen.sv.GenTabbed import *
+from xen.sv.Wizard import *
DEBUG=1
@@ -12,33 +13,69 @@
def __init__( self, urlWriter ):
self.dom = 0;
-
- def tabUrlWriter( tab ):
- return urlWriter( "&dom=%s%s" % ( self.dom, tab ) )
-
- GenTabbed.__init__( self, "Domain Info", tabUrlWriter, [ 'General',
'SXP', 'Devices' ], [ DomGeneralTab, DomSXPTab, NullTab ] )
+
+ GenTabbed.__init__( self, "Domain Info", urlWriter, [ 'General',
'SXP', 'Devices', 'Migrate', 'Save' ], [ DomGeneralTab, DomSXPTab,
DomDeviceTab, DomMigrateTab, DomSaveTab ] )
def write_BODY( self, request ):
- dom = request.args.get('dom')
-
- if dom is None or len(dom) != 1:
+ try:
+ dom = int( getVar( 'dom', request ) )
+ except:
request.write( "<p>Please Select a Domain</p>" )
return None
- else:
- self.dom = dom[0]
-
+
GenTabbed.write_BODY( self, request )
def write_MENU( self, request ):
- pass
-
+ domains = []
+
+ try:
+ domains = server.xend_domains()
+ domains.sort()
+ except:
+ pass
+
+ request.write( "\n<table style='border:0px solid white' cellspacing='0'
cellpadding='0' border='0' width='100%'>\n" )
+ request.write( "<tr class='domainInfoHead'>" )
+ request.write( "<td class='domainInfoHead'
align='center'>Domain</td>\n" )
+ request.write( "<td class='domainInfoHead' align='center'>Name</td>\n" )
+ request.write( "<td class='domainInfoHead' align='center'>State</td>\n"
)
+ request.write( "<td class='domainInfoHead' align='center'></td>\n" )
+ request.write( "</tr>" )
+
+ odd = True
+ if not domains is None:
+ for domain in domains:
+ odd = not odd;
+ if odd:
+ request.write( "<tr class='domainInfoOdd'>\n" )
+ else:
+ request.write( "<tr class='domainInfoEven'>\n" )
+ domInfo = getDomInfo( domain )
+ request.write( "<td class='domainInfo'
align='center'>%(id)s</td>\n" % domInfo )
+ url = self.urlWriter( "&dom=%(id)s" % domInfo )
+ request.write( "<td class='domainInfo' align='center'><a
href='%s'>%s</a></td>\n" % ( url, domInfo['name'] ) )
+ request.write( "<td class='domainInfo'
align='center'>%(state)5s</td>\n" % domInfo )
+ if domInfo[ 'id' ] != "0":
+ request.write( "<td class='domainInfo' align='center'>" )
+ if domInfo[ 'state' ][ 2 ] == "-":
+ request.write( "<img src='images/small-pause.png'
onclick='doOp2( \"pause\", \"%(dom)-4s\" )'>" % domInfo )
+ else:
+ request.write( "<img src='images/small-unpause.png'
onclick='doOp2( \"unpause\", \"%(dom)-4s\" )'>" % domInfo )
+ request.write( "<img src='images/small-destroy.png'
onclick='doOp2( \"destroy\", \"%(dom)-4s\" )'></td>" % domInfo )
+ else:
+ request.write( "<td> </td>" )
+ request.write( "</tr>\n" )
+ else:
+ request.write( "<tr colspan='10'><p class='small'>Error getting
domain list<br/>Perhaps XenD not running?</p></tr>")
+ request.write( "</table>" )
+
class DomGeneralTab( CompositeTab ):
- def __init__( self ):
- CompositeTab.__init__( self, [ DomGenTab, DomActionTab ] )
-
+ def __init__( self, urlWriter ):
+ CompositeTab.__init__( self, [ DomGenTab, DomActionTab ], urlWriter )
+
class DomGenTab( GeneralTab ):
- def __init__( self ):
+ def __init__( self, urlWriter ):
titles = {}
@@ -60,13 +97,13 @@
request.write( "<p>Please Select a Domain</p>" )
return None
- self.dict = getDomInfoHash( self.dom )
+ self.dict = getDomInfo( self.dom )
GeneralTab.write_BODY( self, request )
class DomSXPTab( PreTab ):
- def __init__( self ):
+ def __init__( self, urlWriter ):
self.dom = 0
PreTab.__init__( self, "" )
@@ -86,15 +123,15 @@
self.source = sxp2prettystring( domInfo )
PreTab.write_BODY( self, request )
-
+
class DomActionTab( ActionTab ):
- def __init__( self ):
- actions = { "shutdown" : "shutdown",
- "reboot" : "reboot",
- "pause" : "pause",
- "unpause" : "unpause",
- "destroy" : "destroy" }
+ def __init__( self, urlWriter ):
+ actions = { "shutdown" : "Shutdown",
+ "reboot" : "Reboot",
+ "pause" : "Pause",
+ "unpause" : "Unpause",
+ "destroy" : "Destroy" }
ActionTab.__init__( self, actions )
def op_shutdown( self, request ):
@@ -141,8 +178,91 @@
server.xend_domain_destroy( int( dom ), "halt" )
except:
pass
-
-
-
-
-
+
+class DomDeviceTab( CompositeTab ):
+
+ def __init__( self, urlWriter ):
+ CompositeTab.__init__( self, [ DomDeviceListTab, DomDeviceOptionsTab,
DomDeviceActionTab ], urlWriter )
+
+class DomDeviceListTab( NullTab ):
+
+ title = "Device List"
+
+ def __init__( self, urlWriter ):
+ pass
+
+class DomDeviceOptionsTab( NullTab ):
+
+ title = "Device Options"
+
+ def __init__( self, urlWriter ):
+ pass
+
+class DomDeviceActionTab( ActionTab ):
+
+ def __init__( self, urlWriter ):
+ ActionTab.__init__( self, { "addvcpu" : "Add VCPU", "addvbd" : "Add
VBD", "addvif" : "Add VIF" } )
+
+class DomMigrateTab( CompositeTab ):
+
+ def __init__( self, urlWriter ):
+ CompositeTab.__init__( self, [ DomMigrateExtraTab, DomMigrateActionTab
], urlWriter )
+
+class DomMigrateExtraTab( Sheet ):
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Configure Migration", 0)
+ self.addControl( TickControl('live', 'True', 'Live migrate:') )
+ self.addControl( InputControl('rate', '0', 'Rate limit:') )
+ self.addControl( InputControl( 'dest', 'host.domain', 'Name or IP
address:', ".*") )
+
+class DomMigrateActionTab( ActionTab ):
+
+ def __init__( self, urlWriter ):
+ actions = { "migrate" : "Migrate" }
+ ActionTab.__init__( self, actions )
+
+ def op_migrate( self, request ):
+ try:
+ domid = int( getVar( 'dom', request ) )
+ live = getVar( 'live', request )
+ rate = getVar( 'rate', request )
+ dest = getVar( 'dest', request )
+ dom_sxp = server.xend_domain_migrate( domid, dest, live == 'True',
rate )
+ success = "Your domain was successfully Migrated.\n"
+ except Exception, e:
+ success = "There was an error migrating your domain\n"
+ dom_sxp = str(e)
+
+class DomSaveTab( CompositeTab ):
+
+ def __init__( self, urlWriter ):
+ CompositeTab.__init__( self, [ DomSaveExtraTab, DomSaveActionTab ],
urlWriter )
+
+class DomSaveExtraTab( Sheet ):
+
+ title = "Save location"
+
+ def __init__( self, urlWriter ):
+ Sheet.__init__( self, urlWriter, "Save Domain to file", 0 )
+ self.addControl( InputControl( 'file', '', 'Suspend file name:', ".*")
)
+
+class DomSaveActionTab( ActionTab ):
+
+ def __init__( self, urlWriter ):
+ actions = { "save" : "Save" }
+ ActionTab.__init__( self, actions )
+
+ def op_save( self, request ):
+
+ try:
+ dom_sxp = server.xend_domain_save( config['domid'], config['file']
)
+ success = "Your domain was successfully saved.\n"
+ except Exception, e:
+ success = "There was an error saving your domain\n"
+ dom_sxp = str(e)
+
+ try:
+ dom = int( getVar( 'dom', request ) )
+ except:
+ pass
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/GenTabbed.py
--- a/tools/python/xen/sv/GenTabbed.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/GenTabbed.py Thu Aug 25 22:53:20 2005
@@ -1,7 +1,6 @@
import types
from xen.sv.HTMLBase import HTMLBase
-from xen.sv.TabView import TabView
from xen.sv.util import getVar
class GenTabbed( HTMLBase ):
@@ -12,39 +11,44 @@
self.tabObjects = tabObjects
self.urlWriter = urlWriter
self.title = title
+
+ def write_BODY( self, request ):
+ if not self.__dict__.has_key( "tab" ):
+ try:
+ self.tab = int( getVar( 'tab', request, 0 ) )
+ except:
+ self.tab = 0
+
+ request.write( "\n<div class='title'>%s</div>" % self.title )
+
+ TabView( self.tab, self.tabStrings, self.urlWriter ).write_BODY(
request )
+
+ try:
+ request.write( "\n<div class='tab'>" )
+ render_tab = self.tabObjects[ self.tab ]
+ render_tab( self.urlWriter ).write_BODY( request )
+ request.write( "\n</div>" )
+ except Exception, e:
+ request.write( "\n<p>Error Rendering Tab</p>" )
+ request.write( "\n<p>%s</p>" % str( e ) )
- def write_BODY( self, request, urlWriter = None ):
- try:
- tab = int( getVar( 'tab', request, 0 ) )
- except:
- tab = 0
-
- request.write( "<table style='' width='100%' border='0'
cellspacing='0' cellpadding='0'>" )
- request.write( "<tr><td>" )
- request.write( "<p align='center'><u>%s</u></p>" % self.title )
-
- TabView( tab, self.tabStrings, self.urlWriter ).write_BODY( request )
-
- request.write( "</td></tr><tr><td>" )
+ request.write( "\n<input type=\"hidden\" name=\"tab\" value=\"%d\">" %
self.tab )
+
+ def perform( self, request ):
+ request.write( "Tab> perform" )
+ request.write( "<br/>op: " + str( getVar( 'op', request ) ) )
+ request.write( "<br/>args: " + str( getVar( 'args', request ) ) )
+ request.write( "<br/>tab: " + str( getVar( 'tab', request ) ) )
try:
- render_tab = self.tabObjects[ tab ]
- render_tab().write_BODY( request )
+ action = getVar( 'op', request, 0 )
+ if action == "tab":
+ self.tab = int( getVar( 'args', request ) )
+ else:
+ this.tab = int( getVar( 'tab', request, 0 ) )
+ self.tabObjects[ self.tab ]( self.urlWriter ).perform( request
)
except:
- request.write( "<p>Error Rendering Tab</p>" )
-
- request.write( "</td></tr></table>" )
-
- def perform( self, request ):
- try:
- tab = int( getVar( 'tab', request, 0 ) )
- except:
- tab = 0;
-
- op_tab = self.tabObjects[ tab ]
-
- if op_tab:
- op_tab().perform( request )
+ pass
class PreTab( HTMLBase ):
@@ -53,12 +57,9 @@
self.source = source
def write_BODY( self, request ):
-
- request.write( "<div style='display: block; overflow: auto; border:
0px solid black; width: 540px; padding: 5px; z-index:0; align: center'><pre>" )
-
+ request.write( "\n<pre>" )
request.write( self.source )
-
- request.write( "</pre></div>" )
+ request.write( "\n</pre>" )
class GeneralTab( HTMLBase ):
@@ -69,7 +70,7 @@
def write_BODY( self, request ):
- request.write( "<table width='100%' cellspacing='0' cellpadding='0'
border='0'>" )
+ request.write( "\n<table width='100%' cellspacing='0' cellpadding='0'
border='0'>" )
def writeAttr( niceName, attr, formatter=None ):
if type( attr ) is types.TupleType:
@@ -80,7 +81,7 @@
temp = formatter( self.dict[ attr ] )
else:
temp = str( self.dict[ attr ] )
- request.write( "<tr><td width='50%%'><p>%s:</p></td><td
width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
+ request.write( "\n<tr><td width='50%%'><p>%s:</p></td><td
width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
for niceName, attr in self.titles.items():
writeAttr( niceName, attr )
@@ -89,16 +90,12 @@
class NullTab( HTMLBase ):
- def __init__( self ):
- HTMLBase.__init__( self )
- self.title = "Null Tab"
-
- def __init__( self, title ):
+ def __init__( self, title="Null Tab" ):
HTMLBase.__init__( self )
self.title = title
-
+
def write_BODY( self, request ):
- request.write( "<p>%s</p>" % self.title )
+ request.write( "\n<p>%s</p>" % self.title )
class ActionTab( HTMLBase ):
@@ -107,29 +104,44 @@
HTMLBase.__init__( self )
def write_BODY( self, request ):
- request.write( "<p align='center'><table cellspacing='3'
cellpadding='2' border='0'><tr>" )
-
- for ( command, text ) in self.actions.items():
- request.write( "<td style='border: 1px solid black;
background-color: grey' onmouseover='buttonMouseOver( this )'
onmouseout='buttonMouseOut( this )'>" )
- request.write( "<p><a href='javascript: doOp( \"%s\"
);'>%s</a></p></td>" % (command, text) )
-
- request.write("</table></p>")
-
+ for item in self.actions.items():
+ try:
+ ((op, attr), title) = item
+ except:
+ (op, title) = item
+ attr = ""
+ request.write( "\n<div class='button' onclick=\"doOp2( '%s', '%s'
)\">%s</a></div>" % (op, attr, title) )
+
class CompositeTab( HTMLBase ):
- def __init__( self, tabs ):
+ def __init__( self, tabs, urlWriter ):
HTMLBase.__init__( self )
self.tabs = tabs
+ self.urlWriter = urlWriter
def write_BODY( self, request ):
for tab in self.tabs:
- request.write( "<br/>" )
- tab().write_BODY( request )
+ tab( self.urlWriter ).write_BODY( request )
def perform( self, request ):
for tab in self.tabs:
- tab().perform( request )
-
-
-
-
+ tab( self.urlWriter ).perform( request )
+
+class TabView( HTMLBase ):
+
+ # tab - int, id into tabs of selected tab
+ # tabs - list of strings, tab names
+ # urlWriter -
+ def __init__( self, tab, tabs, urlWriter ):
+ HTMLBase.__init__(self)
+ self.tab = tab
+ self.tabs = tabs
+ self.urlWriter = urlWriter
+
+ def write_BODY( self, request ):
+ for i in range( len( self.tabs ) ):
+ if self.tab == i:
+ at = " id='activeTab'"
+ else:
+ at = ""
+ request.write( "\n<div%s class='tabButton' onclick=\"doOp2(
'tab', '%d' )\">%s</div>" % ( at, i, self.tabs[ i ] ) )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/HTMLBase.py
--- a/tools/python/xen/sv/HTMLBase.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/HTMLBase.py Thu Aug 25 22:53:20 2005
@@ -12,26 +12,17 @@
return self.render_GET( request )
def render_GET( self, request ):
- self.write_TOP( request )
- self.write_BODY( request )
- self.write_BOTTOM( request )
- return ''
-
+ pass
+
def write_BODY( self, request ):
- request.write( "BODY" )
+ pass
def write_TOP( self, request ):
- request.write( '<html><head><title>Xen</title><link rel="stylesheet"
type="text/css" href="inc/style.css" />' )
- request.write( '<script src="inc/script.js"></script>' )
- request.write( '</head><body>' )
- request.write('<form method="post" action="%s">' % request.uri)
-
+ pass
+
def write_BOTTOM( self, request ):
- request.write('<input type="hidden" name="op" value="">')
- request.write('<input type="hidden" name="args" value="">')
- request.write('</form>')
- request.write( "</body></html>" )
-
+ pass
+
def get_op_method(self, op):
"""Get the method for an operation.
For operation 'foo' looks for 'op_foo'.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/Main.py
--- a/tools/python/xen/sv/Main.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/Main.py Thu Aug 25 22:53:20 2005
@@ -1,113 +1,90 @@
+
from xen.sv.HTMLBase import HTMLBase
-from xen.sv.DomList import DomList
from xen.sv.NodeInfo import NodeInfo
from xen.sv.DomInfo import DomInfo
from xen.sv.CreateDomain import CreateDomain
-from xen.sv.MigrateDomain import MigrateDomain
-from xen.sv.SaveDomain import SaveDomain
from xen.sv.RestoreDomain import RestoreDomain
-
-from xen.xend.XendClient import server
from xen.sv.util import getVar
-class Main( HTMLBase ):
-
- isLeaf = True
+# adapter to make this all work with mod_python
+# as opposed to Twisted
+# (c) Tom Wilkie 2005
- def __init__( self, urlWriter = None ):
+class Args:
+ def __init__( self, req ):
+ from mod_python.util import FieldStorage
+ self.fieldStorage = FieldStorage( req, True )
+
+ # return a list of values for the given key,
+ # or None if key not there
+ def get( self, var ):
+ retVar = self.fieldStorage.getlist( var )
+ if len( retVar ) == 0:
+ return None
+ else:
+ return retVar
+
+ # return a list of tuples,
+ # (key, value) where value is a list of values
+ def items( self ):
+ result = [];
+ for key in self.fieldStorage.keys():
+ result.append( (key, self.fieldStorage.getlist( key ) ) )
+ return result
+
+class TwistedAdapter:
+ def __init__( self, req ):
+ self.args = Args( req )
+ self.uri = req.unparsed_uri
+ self.url = req.uri
+ self.write = req.write
+
+# This is the Main class
+# It peices together all the modules
+
+class Main:
+ def __init__( self ):
self.modules = { "node": NodeInfo,
- "list": DomList,
- "info": DomInfo,
"create": CreateDomain,
- "migrate" : MigrateDomain,
- "save" : SaveDomain,
- "restore" : RestoreDomain }
+ "restore" : RestoreDomain,
+ "info": DomInfo }
- # ordered list of module menus to display
- self.module_menus = [ "node", "create", "migrate", "save",
- "restore", "list" ]
- HTMLBase.__init__(self)
-
- def render_POST( self, request ):
-
- #decide what module post'd the action
-
- args = getVar( 'args', request )
+ self.init_done = False
- mod = getVar( 'mod', request )
-
- if not mod is None and args is None:
- module = self.modules[ mod ]
- #check module exists
- if module:
- module( self.mainUrlWriter ).perform( request )
- else:
- self.perform( request )
-
- return self.render_GET( request )
+ def init_modules( self, request ):
+ for moduleName, module in self.modules.iteritems():
+ self.modules[ moduleName ] = module( self.urlWriter( moduleName,
request.url ) )
- def mainUrlWriter( self, module ):
- def fun( f ):
- return "Main.rpy?mod=%s%s" % ( module, f )
- return fun
-
- def write_BODY( self, request ):
-
- request.write( "\n<table style='border:0px solid black; background:
url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0'
width='780px' height='536px'>\n" )
- request.write( "<tr>\n" )
- request.write( " <td width='15px'> </td>" )
- request.write( " <td width='175px' align='center' valign'center'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
- request.write( " <tr><td height='140px' align='center'
valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
- request.write( " <img src='images/xen.png' width='150' height='75'
border='0'/></a><br/></td></tr>" )
- request.write( " <tr><td height='60px' align='center'><p
class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom
Wilkie</a> 2004</p></td></tr>")
- request.write( " <tr><td align='center' valign='top'>" )
+ def render_menu( self, request ):
+ if not self.init_done:
+ self.init_modules( request )
+ self.init_done = True
+
+ for moduleName, module in self.modules.iteritems():
+ module.write_MENU( request )
+ request.write( "\n" )
- for modName in self.module_menus:
- self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU(
request )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " " )
- request.write( " </td>\n" )
- request.write( " <td width='15px'> </td>" )
- request.write( " <td width='558px' align='left' valign='top'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
- request.write( " <tr><td height='20px'></td></tr>" )
- request.write( " <tr><td align='center' valign='top'>" )
-
- modName = getVar('mod', request)
-
- if modName is None:
+ def render_main( self, request ):
+ if not self.init_done:
+ self.init_modules( request )
+ self.init_done = True
+
+ moduleName = getVar('mod', request)
+ if moduleName not in self.modules:
request.write( '<p>Please select a module</p>' )
else:
- module = self.modules[ modName ]
- if module:
- module( self.mainUrlWriter( modName ) ).write_BODY( request )
- else:
- request.write( '<p>Invalid module. Please select another</p>' )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " </td>\n" )
- request.write( " <td width='17px'> </td>" )
- request.write( "</tr>\n" )
+ module = self.modules[ moduleName ]
+ module.write_BODY( request )
+
+ def do_POST( self, request ):
+ if not self.init_done:
+ self.init_modules( request )
+ self.init_done = True
- request.write( "</table>\n" )
-
-
- def op_destroy( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_destroy( int( dom ), "halt" )
-
- def op_pause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_pause( int( dom ) )
-
- def op_unpause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_unpause( int( dom ) )
+ moduleName = getVar( 'mod', request )
+ if moduleName in self.modules:
+ self.modules[ moduleName ].perform( request )
+
+ def urlWriter( self, module, url ):
+ return lambda x: "%s?mod=%s%s" % ( url, module, x )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/NodeInfo.py
--- a/tools/python/xen/sv/NodeInfo.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/NodeInfo.py Thu Aug 25 22:53:20 2005
@@ -6,18 +6,18 @@
class NodeInfo( GenTabbed ):
def __init__( self, urlWriter ):
- GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General',
'Dmesg', ], [ NodeGeneralTab, NodeDmesgTab ] )
+ GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General',
'Dmesg', 'SXP' ], [ NodeGeneralTab, NodeDmesgTab, NodeSXPTab ] )
def write_MENU( self, request ):
request.write( "<p class='small'><a href='%s'>Node details</a></p>" %
self.urlWriter( '' ) )
class NodeGeneralTab( CompositeTab ):
- def __init__( self ):
- CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ] )
+ def __init__( self, urlWriter ):
+ CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ], urlWriter
)
class NodeInfoTab( GeneralTab ):
- def __init__( self ):
+ def __init__( self, urlWriter ):
nodeInfo = {}
try:
@@ -41,7 +41,7 @@
class NodeDmesgTab( PreTab ):
- def __init__( self ):
+ def __init__( self, urlWriter ):
try:
dmesg = server.xend_node_get_dmesg()
except:
@@ -50,7 +50,7 @@
class NodeActionTab( ActionTab ):
- def __init__( self ):
+ def __init__( self, urlWriter ):
ActionTab.__init__( self, { "shutdown" : "shutdown",
"reboot" : "reboot" } )
@@ -61,3 +61,13 @@
def op_reboot( self, request ):
if debug: print ">NodeReboot"
server.xend_node_reboot()
+
+class NodeSXPTab( PreTab ):
+
+ def __init__( self, urlWriter ):
+ try:
+ nodeSXP = sxp2string( server.xend_node() )
+ except:
+ nodeSXP = 'Error getting node sxp'
+
+ PreTab.__init__( self, nodeSXP )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/RestoreDomain.py
--- a/tools/python/xen/sv/RestoreDomain.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/RestoreDomain.py Thu Aug 25 22:53:20 2005
@@ -16,6 +16,8 @@
class ChooseRestoreDomain( Sheet ):
+ title = "Configure Restore"
+
def __init__( self, urlWriter ):
Sheet.__init__( self, urlWriter, "Configure Restore", 0)
@@ -24,6 +26,8 @@
".*") )
class DoRestore( Sheet ):
+ title = "Restore Done"
+
def __init__(self, urlWriter ):
Sheet.__init__(self, urlWriter, "Restore Done", 1)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/Wizard.py
--- a/tools/python/xen/sv/Wizard.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/Wizard.py Thu Aug 25 22:53:20 2005
@@ -1,71 +1,44 @@
from xen.sv.util import *
from xen.sv.HTMLBase import HTMLBase
+from xen.sv.GenTabbed import GenTabbed, ActionTab
from xen.xend import sxp
import re
DEBUG = 0
-class Wizard( HTMLBase ):
+class Wizard( GenTabbed ):
def __init__( self, urlWriter, title, sheets ):
- HTMLBase.__init__( self )
self.title = title
self.sheets = sheets
self.urlWriter = urlWriter
+ self.offset = 0
+ GenTabbed.__init__( self, title, urlWriter, map( lambda x: x.title,
sheets ), sheets )
def write_MENU( self, request ):
request.write( "<p class='small'><a href='%s'>%s</a></p>" %
(self.urlWriter( '' ), self.title) )
def write_BODY( self, request ):
-
- request.write( "<table width='100%' border='0' cellspacing='0'
cellpadding='0'><tr><td>" )
- request.write( "<p align='center'><u>%s</u></p></td></tr><tr><td>" %
self.title )
-
- currSheet = getVar( 'sheet', request )
-
- if not currSheet is None:
- currSheet = int( currSheet )
- else:
- currSheet = 0
-
- sheet = self.sheets[ currSheet ]( self.urlWriter )
-
- err = not sheet.validate( request )
-
- if not err:
- op = getVar( 'op', request )
-
- if op == 'next':
- currSheet += 1
- elif op == 'prev':
- currSheet -= 1
-
- sheet = self.sheets[ currSheet ]( self.urlWriter )
-
- if getVar( 'visited-sheet%s' % currSheet, request ):
- sheet.write_BODY( request, err )
- else:
- sheet.write_BODY( request, False )
-
-
- request.write( "</td></tr><tr><td><table width='100%' border='0'
cellspacing='0' cellpadding='0'><tr>" )
- request.write( "<td width='80%'></td><td width='20%' align='center'><p
align='center'>" )
- if currSheet > 0:
- request.write( "<img src='images/previous.png'
onclick='doOp( \"prev\" )' onmouseover='update( \"wizText\", \"Previous\" )'
onmouseout='update( \"wizText\", \" \" )'> " )
- if currSheet < ( len( self.sheets ) - 2 ):
- request.write( "<img src='images/next.png' onclick='doOp( \"next\"
)' onmouseover='update( \"wizText\", \"Next\" )' onmouseout='update(
\"wizText\", \" \" )'>" )
- elif currSheet == ( len( self.sheets ) - 2 ):
- request.write( "<img src='images/finish.png' onclick='doOp(
\"next\" )' onmouseover='update( \"wizText\", \"Finish\" )' onmouseout='update(
\"wizText\", \" \" )'>" )
- request.write( "</p><p align='center'><span
id='wizText'></span></p></td></tr></table>" )
- request.write( "</td></tr></table>" )
-
- def op_next( self, request ):
- pass
-
- def op_prev( self, request ):
- pass
-
+ GenTabbed.write_BODY( self, request )
+ actionTab = ActionTab( { ("tab", str(self.tab-1)) : "< Prev", ("tab",
str(self.tab+1)) : "Next >", "finish" : "Finish" } )
+ actionTab.write_BODY( request )
+
+ def perform( self, request ):
+ try:
+ action = getVar( 'op', request, 0 )
+ if action == "tab":
+ self.tab = int( getVar( 'args', request ) )
+ oldtab = int( getVar( 'tab', request ) )
+ if not self.tabObjects[ oldtab ]( self.urlWriter ).validate(
request ):
+ self.tab = oldtab
+ else:
+ self.tab = int( getVar( 'tab', request, 0 ) )
+ self.tabObjects[ self.tab ]( self.urlWriter ).perform( request
)
+ getattr( self, "op_" + getVar( "op", request ), None )(
request )
+ except:
+ pass
+
def op_finish( self, request ):
pass
@@ -80,7 +53,7 @@
self.passback = None
def parseForm( self, request ):
- do_not_parse = [ 'mod', 'op', 'sheet', 'passback' ]
+ do_not_parse = [ 'mod', 'op', 'passback' ]
passed_back = request.args
@@ -103,7 +76,7 @@
if DEBUG: print self.passback
- def write_BODY( self, request, err ):
+ def write_BODY( self, request ):
if not self.passback: self.parseForm( request )
@@ -115,14 +88,13 @@
for (feild, control) in self.feilds:
control.write_Control( request, previous_values.get( feild ) )
- if err and not control.validate( previous_values.get( feild ) ):
+ if previous_values.get( feild ) is not None and not
control.validate( previous_values.get( feild ) ):
control.write_Help( request )
request.write( "</table>" )
request.write( "<input type='hidden' name='passback'
value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" %
self.location )
- request.write( "<input type='hidden' name='visited-sheet%s'
value='True'></p>" % self.location )
+ #request.write( "<input type='hidden' name='visited-sheet%s'
value='True'></p>" % self.location )
def addControl( self, control ):
self.feilds.append( [ control.getName(), control ] )
@@ -133,7 +105,7 @@
check = True
- previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the
hash for quick reference
+ previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the
map for quick reference
if DEBUG: print previous_values
for (feild, control) in self.feilds:
@@ -258,12 +230,16 @@
def write_Control( self, request, persistedValue ):
request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>"
% self.humanText )
+
+ #request.write( str( persistedValue ) )
+
+ #TODO: Theres a problem with this: it doesn't persist an untick,
because the browsers don't pass it back. Need a fix...
if persistedValue == 'True':
request.write( "<input type='checkbox' name='%s' value='True'
checked>" % self.getName() )
else:
request.write( "<input type='checkbox' name='%s' value='True'>" %
self.getName() )
- request.write( "</select></td></tr>" )
+ request.write( "</td></tr>" )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/util.py
--- a/tools/python/xen/sv/util.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/util.py Thu Aug 25 22:53:20 2005
@@ -4,7 +4,7 @@
import types
-def getDomInfoHash( domain ):
+def getDomInfo( domain ):
domInfoHash = {}
try:
domInfoHash = sxp2hash( server.xend_domain( domain ) )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/SrvBase.py
--- a/tools/python/xen/web/SrvBase.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/SrvBase.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import types
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/SrvDir.py
--- a/tools/python/xen/web/SrvDir.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/SrvDir.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import types
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/__init__.py
--- a/tools/python/xen/web/__init__.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/__init__.py Thu Aug 25 22:53:20 2005
@@ -1,1 +1,17 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/connection.py
--- a/tools/python/xen/web/connection.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/connection.py Thu Aug 25 22:53:20 2005
@@ -1,7 +1,26 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import sys
import threading
import select
import socket
+import fcntl
from errno import EAGAIN, EINTR, EWOULDBLOCK
@@ -133,6 +152,9 @@
def createSocket(self):
raise NotImplementedError()
+
+ def setCloExec(self):
+ fcntl.fcntl(self.sock.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
def acceptConnection(self, sock, protocol, addr):
return SocketServerConnection(sock, protocol, addr, self)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/httpserver.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import threading
import string
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/protocol.py
--- a/tools/python/xen/web/protocol.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/protocol.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
class Factory:
"""Generic protocol factory.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/reactor.py
--- a/tools/python/xen/web/reactor.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/reactor.py Thu Aug 25 22:53:20 2005
@@ -1,2 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
from unix import listenUNIX, connectUNIX
-from tcp import listenTCP, connectTCP
+from tcp import listenTCP, connectTCP, SetCloExec
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/resource.py
--- a/tools/python/xen/web/resource.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/resource.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import http
def findResource(resource, request):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/static.py
--- a/tools/python/xen/web/static.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/static.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import os
from resource import Resource
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/tcp.py
--- a/tools/python/xen/web/tcp.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/tcp.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import sys
import socket
import types
@@ -68,6 +85,9 @@
l.startListening()
return l
+def SetCloExec(SocketListener):
+ SocketListener.SetCloExec()
+
def connectTCP(host, port, factory, timeout=None, bindAddress=None):
c = TCPConnector(host, port, factory, timeout=timeout,
bindAddress=bindAddress)
c.connect()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/unix.py
--- a/tools/python/xen/web/unix.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/unix.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import sys
import socket
import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/Args.py
--- a/tools/python/xen/xend/Args.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/Args.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import types
import StringIO
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/EventServer.py
--- a/tools/python/xen/xend/EventServer.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/EventServer.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Simple publish/subscribe event server.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/PrettyPrint.py
--- a/tools/python/xen/xend/PrettyPrint.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/PrettyPrint.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""General pretty-printer, including support for SXP.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/Vifctl.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Xend interface to networking control scripts.
"""
import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
import errno
import os
+import re
import select
import sxp
from string import join
@@ -64,6 +65,13 @@
if l.rstrip() == "suspend":
log.info("suspending %d" % dominfo.id)
xd.domain_shutdown(dominfo.id, reason='suspend')
+ if dominfo.store_channel:
+ try:
+ dominfo.db.releaseDomain(dominfo.id)
+ except Exception, ex:
+ log.warning("error in domain release on xenstore:
%s",
+ ex)
+ pass
dominfo.state_wait("suspended")
log.info("suspend %d done" % dominfo.id)
child.tochild.write("done\n")
@@ -76,6 +84,11 @@
if child.wait() != 0:
raise XendError("xc_save failed: %s" % lasterr)
+ if dominfo.store_channel:
+ dominfo.store_channel.close()
+ dominfo.db['store_channel'].delete()
+ dominfo.db.saveDB(save=True)
+ dominfo.store_channel = None
xd.domain_destroy(dominfo.id)
return None
@@ -107,8 +120,13 @@
raise XendError(
"not a valid guest state file: pfn count out of range")
+ if dominfo.store_channel:
+ evtchn = dominfo.store_channel.port2
+ else:
+ evtchn = 0
+
cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
- str(dominfo.id), str(nr_pfns)]
+ str(dominfo.id), str(nr_pfns), str(evtchn)]
log.info("[xc_restore] " + join(cmd))
child = xPopen3(cmd, True, -1, [fd, xc.handle()])
child.tochild.close()
@@ -128,7 +146,21 @@
lasterr = l.rstrip()
if fd == child.fromchild.fileno():
l = child.fromchild.readline()
- log.info(l.rstrip())
+ while l:
+ m = re.match(r"^(store-mfn) (\d+)\n$", l)
+ if m:
+ if dominfo.store_channel:
+ dominfo.store_mfn = int(m.group(2))
+ if dominfo.store_mfn >= 0:
+ dominfo.db.introduceDomain(dominfo.id,
+ dominfo.store_mfn,
+
dominfo.store_channel)
+ dominfo.exportToDB(save=True, sync=True)
+ log.info(l.rstrip())
+ try:
+ l = child.fromchild.readline()
+ except:
+ l = None
if filter(lambda (fd, event): event & select.POLLHUP, r):
break
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendClient.py Thu Aug 25 22:53:20 2005
@@ -1,13 +1,27 @@
#!/usr/bin/env python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Client API for the HTTP interface on xend.
Callable as a script - see main().
Supports inet or unix connection to xend.
This API is the 'control-plane' for xend.
-The 'data-plane' is done separately. For example, consoles
-are accessed via sockets on xend, but the list of consoles
-is accessible via this API.
+The 'data-plane' is done separately.
"""
import os
import sys
@@ -145,9 +159,6 @@
def domainurl(self, id=''):
return self.url.relative('domain/' + str(id))
-
- def consoleurl(self, id=''):
- return self.url.relative('console/' + str(id))
def deviceurl(self, id=''):
return self.url.relative('device/' + str(id))
@@ -213,11 +224,15 @@
return self.xendPost(self.domainurl(id),
{'op' : 'pause' })
- def xend_domain_shutdown(self, id, reason, key=0):
+ def xend_domain_shutdown(self, id, reason):
return self.xendPost(self.domainurl(id),
{'op' : 'shutdown',
- 'reason' : reason,
- 'key' : key })
+ 'reason' : reason})
+
+ def xend_domain_sysrq(self, id, key):
+ return self.xendPost(self.domainurl(id),
+ {'op' : 'sysrq',
+ 'key' : key})
def xend_domain_destroy(self, id, reason):
return self.xendPost(self.domainurl(id),
@@ -317,16 +332,6 @@
{'op' : 'device_configure',
'idx' : idx,
'config' : fileof(config) })
-
- def xend_consoles(self):
- return self.xendGet(self.consoleurl())
-
- def xend_console(self, id):
- return self.xendGet(self.consoleurl(id))
-
- def xend_console_disconnect(self, id):
- return self.xendPost(self.consoleurl(id),
- {'op' : 'disconnect'})
def xend_vnets(self):
return self.xendGet(self.vneturl())
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDB.py
--- a/tools/python/xen/xend/XendDB.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDB.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import os
import os.path
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDmesg.py
--- a/tools/python/xen/xend/XendDmesg.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDmesg.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
- # Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Get dmesg output for this node.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDomain.py Thu Aug 25 22:53:20 2005
@@ -1,5 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
# Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+#============================================================================
"""Handler for domain operations.
Nothing here is persistent (across reboots).
@@ -305,8 +320,7 @@
@param vmconfig: vm configuration
"""
config = sxp.child_value(vmconfig, 'config')
- uuid = sxp.child_value(vmconfig, 'uuid')
- dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+ dominfo = XendDomainInfo.restore(self.dbmap, config)
return dominfo
def domain_restore(self, src, progress=False):
@@ -386,7 +400,7 @@
except Exception, ex:
raise XendError(str(ex))
- def domain_shutdown(self, id, reason='poweroff', key=0):
+ def domain_shutdown(self, id, reason='poweroff'):
"""Shutdown domain (nicely).
- poweroff: restart according to exit code and restart mode
- reboot: restart on exit
@@ -402,9 +416,16 @@
eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.id,
reason])
if reason == 'halt':
reason = 'poweroff'
- val = dominfo.shutdown(reason, key=key)
- if not reason in ['suspend', 'sysrq']:
+ val = dominfo.shutdown(reason)
+ if not reason in ['suspend']:
self.domain_shutdowns()
+ return val
+
+ def domain_sysrq(self, id, key):
+ """Send a SysRq to a domain
+ """
+ dominfo = self.domain_lookup(id)
+ val = dominfo.send_sysrq(key)
return val
def domain_shutdowns(self):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Representation of a single domain.
Includes support for domain construction, using
@@ -8,7 +23,7 @@
"""
-import string
+import string, re
import os
import time
import threading
@@ -21,8 +36,10 @@
from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
from xen.xend.server import messages
from xen.xend.server.channel import EventChannel, channelFactory
+from xen.util.blkif import blkdev_name_to_number, expand_dev_name
from xen.xend import sxp
+from xen.xend import Blkctl
from xen.xend.PrettyPrint import prettyprintstring
from xen.xend.XendBootloader import bootloader
from xen.xend.XendLogging import log
@@ -52,15 +69,6 @@
DOMAIN_CRASH : "crash",
}
-"""Map shutdown reasons to the message type to use.
-"""
-shutdown_messages = {
- 'poweroff' : 'shutdown_poweroff_t',
- 'reboot' : 'shutdown_reboot_t',
- 'suspend' : 'shutdown_suspend_t',
- 'sysrq' : 'shutdown_sysrq_t',
- }
-
RESTART_ALWAYS = 'always'
RESTART_ONREBOOT = 'onreboot'
RESTART_NEVER = 'never'
@@ -132,7 +140,7 @@
if domlist and dom == domlist[0]['dom']:
return domlist[0]
return None
-
+
class XendDomainInfo:
"""Virtual machine object."""
@@ -152,8 +160,6 @@
vm = cls(db)
vm.construct(config)
vm.saveToDB(sync=True)
- # Flush info to xenstore immediately
- vm.exportToDB()
return vm
@@ -191,19 +197,22 @@
recreate = classmethod(recreate)
- def restore(cls, parentdb, config, uuid):
+ def restore(cls, parentdb, config, uuid=None):
"""Create a domain and a VM object to do a restore.
@param parentdb: parent db
@param config: domain configuration
@param uuid: uuid to use
"""
+ if not uuid:
+ uuid = getUuid()
db = parentdb.addChild(uuid)
vm = cls(db)
ssidref = int(sxp.child_value(config, 'ssidref'))
log.debug('restoring with ssidref='+str(ssidref))
id = xc.domain_create(ssidref = ssidref)
vm.setdom(id)
+ vm.clear_shutdown()
try:
vm.restore = True
vm.construct(config)
@@ -227,6 +236,7 @@
DBVar('restart_time', ty='float'),
DBVar('restart_count', ty='int'),
DBVar('target', ty='long', path="memory/target"),
+ DBVar('device_model_pid', ty='int'),
]
def __init__(self, db):
@@ -255,6 +265,8 @@
self.info = None
self.blkif_backend = False
self.netif_backend = False
+ self.netif_idx = 0
+
#todo: state: running, suspended
self.state = STATE_VM_OK
self.state_updated = threading.Condition()
@@ -268,9 +280,10 @@
self.restart_time = None
self.restart_count = 0
- self.console_port = None
self.vcpus = 1
+ self.vcpusdb = {}
self.bootloader = None
+ self.device_model_pid = 0
def setDB(self, db):
self.db = db
@@ -344,9 +357,6 @@
s += " name=" + self.name
s += " memory=" + str(self.memory)
s += " ssidref=" + str(self.ssidref)
- console = self.getConsole()
- if console:
- s += " console=" + str(console.console_port)
s += ">"
return s
@@ -374,6 +384,71 @@
return ctrl
def createDevice(self, type, devconfig, change=False):
+ if type == 'vbd':
+ typedev = sxp.child_value(devconfig, 'dev')
+ if re.match('^ioemu:', typedev):
+ return;
+ backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
+
+ devnum = blkdev_name_to_number(sxp.child_value(devconfig, 'dev'))
+
+ # create backend db
+ backdb = backdom.db.addChild("/backend/%s/%s/%d" %
+ (type, self.uuid, devnum))
+
+ # create frontend db
+ db = self.db.addChild("/device/%s/%d" % (type, devnum))
+
+ db['virtual-device'] = "%i" % devnum
+ #db['backend'] = sxp.child_value(devconfig, 'backend', '0')
+ db['backend'] = backdb.getPath()
+ db['backend-id'] = "%i" % backdom.id
+
+ backdb['frontend'] = db.getPath()
+ (type, params) = string.split(sxp.child_value(devconfig, 'uname'),
':', 1)
+ node = Blkctl.block('bind', type, params)
+ backdb['frontend-id'] = "%i" % self.id
+ backdb['physical-device'] = "%li" % blkdev_name_to_number(node)
+ backdb.saveDB(save=True)
+
+ # Ok, super gross, this really doesn't belong in the frontend db...
+ db['type'] = type
+ db['node'] = node
+ db['params'] = params
+ db.saveDB(save=True)
+
+ return
+
+ if type == 'vif':
+ backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
+
+ log.error(devconfig)
+
+ devnum = self.netif_idx
+ self.netif_idx += 1
+
+ # create backend db
+ backdb = backdom.db.addChild("/backend/%s/%s/%d" %
+ (type, self.uuid, devnum))
+
+ # create frontend db
+ db = self.db.addChild("/device/%s/%d" % (type, devnum))
+
+ backdb['frontend'] = db.getPath()
+ backdb['frontend-id'] = "%i" % self.id
+ backdb['handle'] = "%i" % devnum
+ backdb.saveDB(save=True)
+
+ db['backend'] = backdb.getPath()
+ db['backend-id'] = "%i" % backdom.id
+ db['handle'] = "%i" % devnum
+ log.error(sxp.child_value(devconfig, 'mac'))
+ db['mac'] = sxp.child_value(devconfig, 'mac')
+
+ db.saveDB(save=True)
+
+ return
+
ctrl = self.findDeviceController(type)
return ctrl.createDevice(devconfig, recreate=self.recreate,
change=change)
@@ -443,9 +518,6 @@
sxpr.append(self.store_channel.sxpr())
if self.store_mfn:
sxpr.append(['store_mfn', self.store_mfn])
- console = self.getConsole()
- if console:
- sxpr.append(console.sxpr())
if self.restart_count:
sxpr.append(['restart_count', self.restart_count])
@@ -459,6 +531,8 @@
sxpr.append(devs)
if self.config:
sxpr.append(['config', self.config])
+ if self.device_model_pid:
+ sxpr.append(['device_model_pid',self.device_model_pid])
return sxpr
def sxpr_devices(self):
@@ -519,7 +593,6 @@
# Create domain devices.
self.configure_backends()
- self.configure_console()
self.configure_restart()
self.construct_image()
self.configure()
@@ -558,6 +631,16 @@
except:
raise VmError('invalid vcpus value')
+ def exportVCPUSToDB(self, vcpus):
+ for v in range(0,vcpus):
+ path = "/cpu/%d"%(v)
+ if not self.vcpusdb.has_key(path):
+ self.vcpusdb[path] = self.db.addChild(path)
+ db = self.vcpusdb[path]
+ log.debug("writing key availability=online to path %s in
store"%(path))
+ db['availability'] = "online"
+ db.saveDB(save=True)
+
def init_image(self):
"""Create boot image handler for the domain.
"""
@@ -572,15 +655,17 @@
self.create_channel()
self.image.createImage()
self.exportToDB()
- if self.store_channel:
+ if self.store_channel and self.store_mfn >= 0:
self.db.introduceDomain(self.id,
self.store_mfn,
self.store_channel)
+ # get the configured value of vcpus and update store
+ self.exportVCPUSToDB(self.vcpus)
def delete(self):
"""Delete the vm's db.
"""
- if self.dom_get(self.id):
+ if dom_get(self.id):
return
self.id = None
self.saveToDB(sync=True)
@@ -629,6 +714,7 @@
pass
if self.image:
try:
+ self.device_model_pid = 0
self.image.destroy()
self.image = None
except:
@@ -654,6 +740,21 @@
for ctrl in self.getDeviceControllers():
if ctrl.isDestroyed(): continue
ctrl.destroyController(reboot=reboot)
+ ddb = self.db.addChild("/device")
+ for type in ddb.keys():
+ if type == 'vbd':
+ typedb = ddb.addChild(type)
+ for dev in typedb.keys():
+ devdb = typedb.addChild(str(dev))
+ Blkctl.block('unbind', devdb['type'].getData(),
+ devdb['node'].getData())
+ typedb[dev].delete()
+ typedb.saveDB(save=True)
+ if type == 'vif':
+ typedb = ddb.addChild(type)
+ for dev in typedb.keys():
+ typedb[dev].delete()
+ typedb.saveDB(save=True)
def show(self):
"""Print virtual machine info.
@@ -730,7 +831,8 @@
ctrl.initController(reboot=True)
else:
self.create_configured_devices()
- self.image.createDeviceModel()
+ if not self.device_model_pid:
+ self.device_model_pid = self.image.createDeviceModel()
def device_create(self, dev_config):
"""Create a new device.
@@ -738,7 +840,7 @@
@param dev_config: device configuration
"""
dev_type = sxp.name(dev_config)
- dev = self.createDevice(self, dev_config, change=True)
+ dev = self.createDevice(dev_type, dev_config, change=True)
self.config.append(['device', dev.getConfig()])
return dev.sxpr()
@@ -785,17 +887,6 @@
"""
self.bootloader = sxp.child_value(self.config, "bootloader")
- def configure_console(self):
- """Configure the vm console port.
- """
- x = sxp.child_value(self.config, 'console')
- if x:
- try:
- port = int(x)
- except:
- raise VmError('invalid console:' + str(x))
- self.console_port = port
-
def configure_restart(self):
"""Configure the vm restart mode.
"""
@@ -855,7 +946,7 @@
def restart(self):
"""Restart the domain after it has exited.
- Reuses the domain id and console port.
+ Reuses the domain id
"""
try:
@@ -910,24 +1001,8 @@
"""
self.configure_fields()
- self.create_console()
self.create_devices()
self.create_blkif()
-
- def create_console(self):
- console = self.getConsole()
- if not console:
- config = ['console']
- if self.console_port:
- config.append(['console_port', self.console_port])
- console = self.createDevice('console', config)
- return console
-
- def getConsole(self):
- console_ctrl = self.getDeviceController("console", error=False)
- if console_ctrl:
- return console_ctrl.getDevice(0)
- return None
def create_blkif(self):
"""Create the block device interface (blkif) for the vm.
@@ -935,6 +1010,7 @@
at creation time, for example when it uses NFS root.
"""
+ return
blkif = self.getDeviceController("vbd", error=False)
if not blkif:
blkif = self.createDeviceController("vbd")
@@ -967,28 +1043,39 @@
def vcpu_hotplug(self, vcpu, state):
"""Disable or enable VCPU in domain.
"""
- log.error("Holly Shit! %d %d\n" % (vcpu, state))
- if self.channel:
+ db = ""
+ try:
+ db = self.vcpusdb['/cpu/%d'%(vcpu)]
+ except:
+ log.error("Invalid VCPU")
+ return
+
+ if self.store_channel:
if int(state) == 0:
- msg = messages.packMsg('vcpu_hotplug_off_t', { 'vcpu' : vcpu} )
+ db['availability'] = "offline"
else:
- msg = messages.packMsg('vcpu_hotplug_on_t', { 'vcpu' : vcpu} )
-
- self.channel.writeRequest(msg)
-
- def shutdown(self, reason, key=0):
- msgtype = shutdown_messages.get(reason)
- if not msgtype:
+ db['availability'] = "online"
+
+ db.saveDB(save=True)
+
+ def shutdown(self, reason):
+ if not reason in shutdown_reasons.values():
raise XendError('invalid reason:' + reason)
- extra = {}
- if reason == 'sysrq':
- extra['key'] = key
- if self.channel:
- msg = messages.packMsg(msgtype, extra)
- self.channel.writeRequest(msg)
- if not reason in ['suspend', 'sysrq']:
- self.shutdown_pending = {'start':time.time(), 'reason':reason,
- 'key':key}
+ db = self.db.addChild("/control");
+ db['shutdown'] = reason;
+ db.saveDB(save=True);
+ if not reason in ['suspend']:
+ self.shutdown_pending = {'start':time.time(), 'reason':reason}
+
+ def clear_shutdown(self):
+ db = self.db.addChild("/control")
+ db['shutdown'] = ""
+ db.saveDB(save=True)
+
+ def send_sysrq(self, key=0):
+ db = self.db.addChild("/control");
+ db['sysrq'] = '%c' % key;
+ db.saveDB(save=True);
def shutdown_time_left(self, timeout):
if not self.shutdown_pending:
@@ -1003,6 +1090,8 @@
self.db.introduceDomain(self.id, self.store_mfn,
self.store_channel)
self.exportToDB(save=True, sync=True)
+ # get run-time value of vcpus and update store
+ self.exportVCPUSToDB(dom_get(self.id)['vcpus'])
def vm_field_ignore(vm, config, val, index):
"""Dummy config field handler used for fields with built-in handling.
@@ -1048,7 +1137,6 @@
add_config_handler('ssidref', vm_field_ignore)
add_config_handler('cpu', vm_field_ignore)
add_config_handler('cpu_weight', vm_field_ignore)
-add_config_handler('console', vm_field_ignore)
add_config_handler('restart', vm_field_ignore)
add_config_handler('image', vm_field_ignore)
add_config_handler('device', vm_field_ignore)
@@ -1062,9 +1150,6 @@
#============================================================================
# Register device controllers and their device config types.
-from server import console
-controller.addDevControllerClass("console", console.ConsoleController)
-
from server import blkif
controller.addDevControllerClass("vbd", blkif.BlkifController)
add_device_handler("vbd", "vbd")
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendError.py
--- a/tools/python/xen/xend/XendError.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendError.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
class XendError(ValueError):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendLogging.py
--- a/tools/python/xen/xend/XendLogging.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendLogging.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import types
import logging
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendNode.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Handler for node operations.
Has some persistent state:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendProtocol.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import socket
import httplib
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendRoot.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Xend root class.
Creates the event server and handles configuration.
@@ -69,12 +84,6 @@
"""Default path the unix-domain server listens at."""
xend_unix_path_default = '/var/lib/xend/xend-socket'
-
- """Default interface address xend listens at for consoles."""
- console_address_default = 'localhost'
-
- """Default port xend serves consoles at. """
- console_port_base_default = '9600'
dom0_min_mem_default = '0'
@@ -302,24 +311,11 @@
"""
return self.get_config_value("xend-unix-path",
self.xend_unix_path_default)
- def get_console_address(self):
- """Get the address xend listens at for its console ports.
- This defaults to 'localhost', allowing only the localhost to connect
- to the console ports. Setting this to the empty string, allows all
- hosts to connect.
- """
- return self.get_config_value('console-address',
self.console_address_default)
-
- def get_console_port_base(self):
- """Get the base port number used to generate console ports for domains.
- """
- return self.get_config_int('console-port-base',
self.console_port_base_default)
-
def get_block_script(self, type):
return self.get_config_value('block-%s' % type, '')
def get_network_script(self):
- return self.get_config_value('network-script', 'network')
+ return self.get_config_value('network-script', '')
def get_enable_dump(self):
return self.get_config_bool('enable-dump', 'no')
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendVnet.py
--- a/tools/python/xen/xend/XendVnet.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendVnet.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Handler for vnet operations.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/encode.py
--- a/tools/python/xen/xend/encode.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/encode.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Encoding for arguments to HTTP calls.
Uses the url-encoding with MIME type 'application/x-www-form-urlencoded'
if the data does not include files. Otherwise it uses the encoding with
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/image.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,22 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import os, string
+import re
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
from xen.xend import sxp
@@ -245,7 +263,7 @@
memmap = None
memmap_value = []
device_channel = None
-
+ pid = 0
def createImage(self):
"""Create a VM for the VMX environment.
"""
@@ -257,14 +275,24 @@
# Create an event channel
self.device_channel = channel.eventChannel(0, self.vm.getDomain())
log.info("VMX device model port: %d", self.device_channel.port2)
- return xc.vmx_build(dom = self.vm.getDomain(),
+ if self.vm.store_channel:
+ store_evtchn = self.vm.store_channel.port2
+ else:
+ store_evtchn = 0
+ ret = xc.vmx_build(dom = self.vm.getDomain(),
image = self.kernel,
control_evtchn = self.device_channel.port2,
+ store_evtchn = store_evtchn,
memsize = self.vm.memory,
memmap = self.memmap_value,
cmdline = self.cmdline,
ramdisk = self.ramdisk,
- flags = self.flags)
+ flags = self.flags,
+ vcpus = self.vm.vcpus)
+ if isinstance(ret, dict):
+ self.vm.store_mfn = ret.get('store_mfn')
+ return 0
+ return ret
def parseMemmap(self):
self.memmap = sxp.child_value(self.vm.config, "memmap")
@@ -278,7 +306,7 @@
# xm config file
def parseDeviceModelArgs(self):
dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
- 'localtime', 'serial', 'macaddr', 'stdvga', 'isa' ]
+ 'localtime', 'serial', 'stdvga', 'isa' ]
ret = []
for a in dmargs:
v = sxp.child_value(self.vm.config, a)
@@ -295,20 +323,32 @@
ret.append("-%s" % a)
ret.append("%s" % v)
- # Handle hd img related options
+ # Handle disk/network related options
devices = sxp.children(self.vm.config, 'device')
for device in devices:
- vbdinfo = sxp.child(device, 'vbd')
- if not vbdinfo:
- raise VmError("vmx: missing vbd configuration")
- uname = sxp.child_value(vbdinfo, 'uname')
- vbddev = sxp.child_value(vbdinfo, 'dev')
- (vbdtype, vbdparam) = string.split(uname, ':', 1)
- vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
- if vbdtype != 'file' or vbddev not in vbddev_list:
- raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
- ret.append("-%s" % vbddev)
- ret.append("%s" % vbdparam)
+ name = sxp.name(sxp.child0(device))
+ if name == 'vbd':
+ vbdinfo = sxp.child(device, 'vbd')
+ uname = sxp.child_value(vbdinfo, 'uname')
+ typedev = sxp.child_value(vbdinfo, 'dev')
+ (vbdtype, vbdparam) = string.split(uname, ':', 1)
+ if re.match('^ioemu:', typedev):
+ (emtype, vbddev) = string.split(typedev, ':', 1)
+ else:
+ emtype = 'vbd'
+ vbddev = typedev
+ if emtype != 'ioemu':
+ continue;
+ vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+ if vbddev not in vbddev_list:
+ raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+ ret.append("-%s" % vbddev)
+ ret.append("%s" % vbdparam)
+ if name == 'vif':
+ vifinfo = sxp.child(device, 'vif')
+ mac = sxp.child_value(vifinfo, 'mac')
+ ret.append("-macaddr")
+ ret.append("%s" % mac)
# Handle graphics library related options
vnc = sxp.child_value(self.vm.config, 'vnc')
@@ -347,6 +387,7 @@
log.info("spawning device models: %s %s", device_model, args)
self.pid = os.spawnve(os.P_NOWAIT, device_model, args, env)
log.info("device model pid: %d", self.pid)
+ return self.pid
def vncParams(self):
# see if a vncviewer was specified
@@ -366,11 +407,16 @@
def destroy(self):
channel.eventChannelClose(self.device_channel)
import signal
+ if not self.pid:
+ self.pid = self.vm.device_model_pid
os.kill(self.pid, signal.SIGKILL)
(pid, status) = os.waitpid(self.pid, 0)
+ self.pid = 0
def getDomainMemory(self, mem_mb):
- return (mem_mb * 1024) + self.getPageTableSize(mem_mb)
+ # for ioreq_t and xenstore
+ static_pages = 2
+ return (mem_mb * 1024) + self.getPageTableSize(mem_mb) + 4 *
static_pages
def getPageTableSize(self, mem_mb):
"""Return the size of memory needed for 1:1 page tables for physical
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/scheduler.py
--- a/tools/python/xen/xend/scheduler.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/scheduler.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import threading
def later(delay, fn, args=(), kwargs={}):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Thu Aug 25 22:53:20 2005
@@ -126,12 +126,8 @@
def cleanup_xend(self, kill=False):
return self.cleanup_process(XEND_PID_FILE, "xend", kill)
- def cleanup_xenstored(self, kill=False):
- return self.cleanup_process(XENSTORED_PID_FILE, "xenstored", kill)
-
def cleanup(self, kill=False):
self.cleanup_xend(kill=kill)
- #self.cleanup_xenstored(kill=kill)
def status(self):
"""Returns the status of the xend daemon.
@@ -167,31 +163,6 @@
pidfile.write(str(pid))
pidfile.close()
return pid
-
- def start_xenstored(self):
- """Fork and exec xenstored, writing its pid to XENSTORED_PID_FILE.
- """
- def mkdirs(p):
- try:
- os.makedirs(p)
- except:
- pass
- mkdirs(XENSTORED_RUN_DIR)
- mkdirs(XENSTORED_LIB_DIR)
-
- pid = self.fork_pid(XENSTORED_PID_FILE)
- if pid:
- # Parent
- log.info("Started xenstored, pid=%d", pid)
- else:
- # Child
- if XEND_DAEMONIZE:
- self.daemonize()
- if XENSTORED_DEBUG:
- os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork",
- "-T", "/var/log/xenstored-trace.log")
- else:
- os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
def daemonize(self):
if not XEND_DAEMONIZE: return
@@ -223,14 +194,10 @@
4 Insufficient privileges
"""
xend_pid = self.cleanup_xend()
- xenstored_pid = self.cleanup_xenstored()
if self.set_user():
return 4
os.chdir("/")
-
- if xenstored_pid == 0:
- self.start_xenstored()
if xend_pid > 0:
# Trying to run an already-running service is a success.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDmesg.py
--- a/tools/python/xen/xend/server/SrvDmesg.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDmesg.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDomain.py Thu Aug 25 22:53:20 2005
@@ -1,10 +1,24 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
from xen.web import http
from xen.xend import sxp
from xen.xend import XendDomain
-from xen.xend import XendConsole
from xen.xend import PrettyPrint
from xen.xend.Args import FormFn
@@ -18,7 +32,6 @@
SrvDir.__init__(self)
self.dom = dom
self.xd = XendDomain.instance()
- self.xconsole = XendConsole.instance()
def op_configure(self, op, req):
"""Configure an existing domain.
@@ -41,9 +54,17 @@
def op_shutdown(self, op, req):
fn = FormFn(self.xd.domain_shutdown,
[['dom', 'int'],
- ['reason', 'str'],
+ ['reason', 'str']])
+ val = fn(req.args, {'dom': self.dom.id})
+ req.setResponseCode(http.ACCEPTED)
+ req.setHeader("Location", "%s/.." % req.prePathURL())
+ return val
+
+ def op_sysrq(self, op, req):
+ fn = FormFn(self.xd.domain_sysrq,
+ [['dom', 'int'],
['key', 'int']])
- val = fn(req.args, {'dom': self.dom.id})
+ val = fn(req.args, {'dom' : self.dom.id})
req.setResponseCode(http.ACCEPTED)
req.setHeader("Location", "%s/.." % req.prePathURL())
return val
@@ -208,14 +229,6 @@
self.print_path(req)
#self.ls()
req.write('<p>%s</p>' % self.dom)
- if self.dom.console:
- cinfo = self.dom.console
- cid = str(cinfo.console_port)
- #todo: Local xref: need to know server prefix.
- req.write('<p><a href="/xend/console/%s">Console %s</a></p>'
- % (cid, cid))
- req.write('<p><a href="%s">Connect to console</a></p>'
- % cinfo.uri())
if self.dom.config:
req.write("<code><pre>")
PrettyPrint.prettyprint(self.dom.config, out=req)
diff -r 5f1ed597f107 -r 8799d14bef77
tools/python/xen/xend/server/SrvDomainDir.py
--- a/tools/python/xen/xend/server/SrvDomainDir.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDomainDir.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import traceback
from StringIO import StringIO
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvNode.py
--- a/tools/python/xen/xend/server/SrvNode.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvNode.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvRoot.py
--- a/tools/python/xen/xend/server/SrvRoot.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvRoot.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
from xen.xend import XendRoot
xroot = XendRoot.instance()
@@ -15,7 +30,6 @@
subdirs = [
('node', 'SrvNode' ),
('domain', 'SrvDomainDir' ),
- ('console', 'SrvConsoleDir' ),
('vnet', 'SrvVnetDir' ),
]
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Thu Aug 25 22:53:20 2005
@@ -1,7 +1,22 @@
#!/usr/bin/python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
-"""Example xend HTTP and console server.
+"""Example xend HTTP
Can be accessed from a browser or from a program.
Do 'python SrvServer.py' to run the server.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvVnetDir.py
--- a/tools/python/xen/xend/server/SrvVnetDir.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvVnetDir.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
from xen.xend import sxp
from xen.xend.Args import FormFn
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvXendLog.py
--- a/tools/python/xen/xend/server/SrvXendLog.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvXendLog.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
from xen.web import static
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/blkif.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Support for virtual block devices.
"""
import string
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/channel.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import threading
import select
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/controller.py
--- a/tools/python/xen/xend/server/controller.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/controller.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""General support for controllers, which handle devices
for a domain.
"""
@@ -126,7 +142,7 @@
def createDevController(self, type, vm, recreate=False):
cls = self.getDevControllerClass(type)
if not cls:
- raise XendError("unknown device type: " + type)
+ raise XendError("unknown device type: " + str(type))
return cls.createDevController(vm, recreate=recreate)
def getDevControllerTable():
@@ -267,6 +283,8 @@
dev.attach(recreate=recreate, change=change)
dev.exportToDB()
+ return dev
+
def configureDevice(self, id, config, change=False):
"""Reconfigure an existing device.
May be defined in subclass."""
@@ -307,9 +325,9 @@
return self.destroyed
def getDevice(self, id, error=False):
- dev = self.devices.get(id)
+ dev = self.devices.get(int(id))
if error and not dev:
- raise XendError("invalid device id: " + id)
+ raise XendError("invalid device id: " + str(id))
return dev
def getDeviceIds(self):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/event.py
--- a/tools/python/xen/xend/server/event.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/event.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import sys
import StringIO
@@ -33,7 +50,7 @@
def dataReceived(self, data):
try:
self.parser.input(data)
- if self.parser.ready():
+ while(self.parser.ready()):
val = self.parser.get_val()
res = self.dispatch(val)
self.send_result(res)
@@ -128,16 +145,8 @@
def op_pretty(self, name, req):
self.pretty = 1
- def op_console_disconnect(self, name, req):
- id = sxp.child_value(req, 'id')
- if not id:
- raise XendError('Missing console id')
- id = int(id)
- self.daemon.console_disconnect(id)
-
def op_info(self, name, req):
val = ['info']
- #val += self.daemon.consoles()
#val += self.daemon.blkifs()
#val += self.daemon.netifs()
#val += self.daemon.usbifs()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/messages.py
--- a/tools/python/xen/xend/server/messages.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/messages.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import sys
import struct
import types
@@ -16,16 +33,6 @@
See below.
"""
msg_formats = {}
-
-#============================================================================
-# Console message types.
-#============================================================================
-
-CMSG_CONSOLE = 0
-
-console_formats = { 'console_data': (CMSG_CONSOLE, 0) }
-
-msg_formats.update(console_formats)
#============================================================================
# Block interface message types.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/netif.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Support for virtual network interfaces.
"""
@@ -405,7 +421,9 @@
'netif_handle' : self.vif,
'evtchn' : self.getEventChannelBackend(),
'tx_shmem_frame' : val['tx_shmem_frame'],
- 'rx_shmem_frame' : val['rx_shmem_frame'] })
+ 'tx_shmem_ref' : val['tx_shmem_ref'],
+ 'rx_shmem_frame' : val['rx_shmem_frame'],
+ 'rx_shmem_ref' : val['rx_shmem_ref'] })
msg = self.backendChannel.requestResponse(msg)
#todo: check return status
self.status = NETIF_INTERFACE_STATUS_CONNECTED
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/params.py
--- a/tools/python/xen/xend/server/params.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/params.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import os
def getenv(var, val, conv=None):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/pciif.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
import types
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/relocate.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import socket
import sys
@@ -26,7 +42,7 @@
def dataReceived(self, data):
try:
self.parser.input(data)
- if self.parser.ready():
+ while(self.parser.ready()):
val = self.parser.get_val()
res = self.dispatch(val)
self.send_result(res)
@@ -124,7 +140,8 @@
if xroot.get_xend_relocation_server():
port = xroot.get_xend_relocation_port()
interface = xroot.get_xend_relocation_address()
- reactor.listenTCP(port, factory, interface=interface)
+ l = reactor.listenTCP(port, factory, interface=interface)
+ l.setCloExec()
def setupRelocation(dst, port):
try:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/sxp.py
--- a/tools/python/xen/xend/sxp.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/sxp.py Thu Aug 25 22:53:20 2005
@@ -1,5 +1,21 @@
#!/usr/bin/python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""
Input-driven parsing for s-expression (sxp) format.
Create a parser: pin = Parser();
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/uuid.py
--- a/tools/python/xen/xend/uuid.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/uuid.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Universal(ly) Unique Identifiers (UUIDs).
"""
import commands
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/__init__.py
--- a/tools/python/xen/xend/xenstore/__init__.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/__init__.py Thu Aug 25 22:53:20 2005
@@ -1,2 +1,18 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
from xsnode import *
from xsobj import *
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/xsnode.py
--- a/tools/python/xen/xend/xenstore/xsnode.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsnode.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import errno
import os
import os.path
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/xsobj.py
--- a/tools/python/xen/xend/xenstore/xsobj.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsobj.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
import string
import types
@@ -307,24 +323,24 @@
db = self.__db__
if path is None:
path = db.relPath()
- print 'DBMap>introduceDomain>', dom, page, evtchn, path
+ log.info("DBMap>introduceDomain> %d %d %s %s" %(dom, page, evtchn,
path))
try:
db.introduceDomain(dom, page, evtchn, path)
except Exception, ex:
import traceback
traceback.print_exc()
- print 'DBMap>introduceDomain>', ex
+ log.info("DBMap>introduceDomain> %s" %ex)
pass # todo: don't ignore
def releaseDomain(self, dom):
db = self.__db__
- print 'DBMap>releaseDomain>', dom
+ log.info("DBMap>releaseDomain> %d" %dom)
try:
db.releaseDomain(dom)
except Exception, ex:
import traceback
traceback.print_exc()
- print 'DBMap>releaseDomain>', ex
+ log.info("DBMap>releaseDomain> %s" %ex)
pass # todo: don't ignore
def watch(self, fn, path=""):
diff -r 5f1ed597f107 -r 8799d14bef77
tools/python/xen/xend/xenstore/xsresource.py
--- a/tools/python/xen/xend/xenstore/xsresource.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsresource.py Thu Aug 25 22:53:20 2005
@@ -1,3 +1,16 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#============================================================================
# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
#============================================================================
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/create.py Thu Aug 25 22:53:20 2005
@@ -1,5 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
# Copyright (C) 2005 Nguyen Anh Quynh <aquynh@xxxxxxxxx>
+#============================================================================
"""Domain creation.
"""
@@ -7,6 +22,8 @@
import string
import sys
import socket
+import commands
+import time
import xen.lowlevel.xc
@@ -16,8 +33,6 @@
from xen.xend.XendBootloader import bootloader
from xen.xend import XendRoot; xroot = XendRoot.instance()
from xen.util import blkif
-
-from xen.util import console_client
from xen.xm.opts import *
@@ -144,10 +159,6 @@
fn=set_float, default=None,
use="""Set the new domain's cpu weight.
WEIGHT is a float that controls the domain's share of the cpu.""")
-
-gopts.var('console', val='PORT',
- fn=set_int, default=None,
- use="Console port to use. Default is 9600 + domain id.")
gopts.var('restart', val='onreboot|always|never',
fn=set_value, default=None,
@@ -370,7 +381,6 @@
@return: MAC address string
"""
- random.seed()
mac = [ 0xaa, 0x00, 0x00,
random.randint(0x00, 0x7f),
random.randint(0x00, 0xff),
@@ -471,8 +481,6 @@
config.append(['backend', ['netif']])
if vals.restart:
config.append(['restart', vals.restart])
- if vals.console:
- config.append(['console', vals.console])
if vals.bootloader:
run_bootloader(opts, config, vals)
@@ -584,9 +592,14 @@
return d
return None
+vncpid = None
+
def spawn_vnc(display):
- os.system("vncviewer -log *:stdout:0 -listen %d &" %
- (VNC_BASE_PORT + display))
+ vncargs = (["vncviewer" + "-log", "*:stdout:0",
+ "-listen", "%d" % (VNC_BASE_PORT + display) ])
+ global vncpid
+ vncpid = os.spawnvp(os.P_NOWAIT, "vncviewer", vncargs)
+
return VNC_BASE_PORT + display
def preprocess_vnc(opts, vals):
@@ -620,8 +633,8 @@
@param opts: options
@param config: configuration
- @return: domain id, console port
- @rtype: (int, int)
+ @return: domain id
+ @rtype: int
"""
try:
@@ -631,22 +644,19 @@
else:
dominfo = server.xend_domain_create(config)
except XendError, ex:
+ import signal
+ if vncpid:
+ os.kill(vncpid, signal.SIGKILL)
opts.err(str(ex))
dom = sxp.child_value(dominfo, 'name')
- console_info = sxp.child(dominfo, 'console')
- if console_info:
- console_port = int(sxp.child_value(console_info, 'console_port'))
- else:
- console_port = None
if not opts.vals.paused:
if server.xend_domain_unpause(dom) < 0:
server.xend_domain_destroy(dom)
opts.err("Failed to unpause domain %s" % dom)
- opts.info("Started domain %s, console on port %d"
- % (dom, console_port))
- return (dom, console_port)
+ opts.info("Started domain %s" % (dom))
+ return int(sxp.child_value(dominfo, 'id'))
def get_dom0_alloc():
"""Return current allocation memory of dom0 (in MB). Return 0 on error"""
@@ -665,20 +675,38 @@
return 0
def balloon_out(dom0_min_mem, opts):
- """Balloon out to get memory for domU, if necessarily"""
+ """Balloon out memory from dom0 if necessary"""
SLACK = 4
+ timeout = 20 # 2s
+ ret = 0
xc = xen.lowlevel.xc.new()
pinfo = xc.physinfo()
- free_mem = pinfo['free_pages']/256
- if free_mem < opts.vals.memory + SLACK:
- need_mem = opts.vals.memory + SLACK - free_mem
- cur_alloc = get_dom0_alloc()
- if cur_alloc - need_mem >= dom0_min_mem:
- server.xend_domain_mem_target_set(0, cur_alloc - need_mem)
+ free_mem = pinfo['free_pages'] / 256
+ domU_need_mem = opts.vals.memory + SLACK
+
+ dom0_cur_alloc = get_dom0_alloc()
+ dom0_new_alloc = dom0_cur_alloc - (domU_need_mem - free_mem)
+
+ if free_mem < domU_need_mem and dom0_new_alloc < dom0_min_mem:
+ ret = 1
+ if free_mem < domU_need_mem and ret == 0:
+
+ server.xend_domain_mem_target_set(0, dom0_new_alloc)
+
+ while dom0_cur_alloc > dom0_new_alloc and timeout > 0:
+ time.sleep(0.1) # sleep 100ms
+ dom0_cur_alloc = get_dom0_alloc()
+ timeout -= 1
+
+ if dom0_cur_alloc > dom0_new_alloc:
+ ret = 1
+
del xc
+ return ret
def main(argv):
+ random.seed()
opts = gopts
args = opts.parse(argv)
if opts.vals.help:
@@ -707,12 +735,14 @@
else:
dom0_min_mem = xroot.get_dom0_min_mem()
if dom0_min_mem != 0:
- balloon_out(dom0_min_mem, opts)
-
- (dom, console) = make_domain(opts, config)
+ if balloon_out(dom0_min_mem, opts):
+ print >>sys.stderr, "error: cannot allocate enough memory for
domain"
+ sys.exit(1)
+
+ dom = make_domain(opts, config)
if opts.vals.console_autoconnect:
- path = "/var/lib/xend/console-%s" % console
- console_client.connect('localhost', console, path=path)
+ cmd = "/usr/libexec/xen/xenconsole %d" % dom
+ os.execvp('/usr/libexec/xen/xenconsole', cmd.split())
if __name__ == '__main__':
main(sys.argv)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/destroy.py
--- a/tools/python/xen/xm/destroy.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/destroy.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Destroy a domain.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/help.py
--- a/tools/python/xen/xm/help.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/help.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Variable definition and help support for Python defconfig files.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/main.py Thu Aug 25 22:53:20 2005
@@ -1,27 +1,124 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+# (C) Copyright IBM Corp. 2005
+# Copyright (C) 2004 Mike Wray
+#
+# Authors:
+# Sean Dague <sean at dague dot net>
+# Mike Wray <mike dot wray at hp dot com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
"""Grand unified management application for Xen.
"""
import os
import os.path
import sys
+import commands
+import re
from getopt import getopt
import socket
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
-
from xen.xend import PrettyPrint
from xen.xend import sxp
-# this is a nasty place to stick this in, but required because
-# log file access is set up via a 5 deep import chain. This
-# ensures the user sees a useful message instead of a stack trace
-if os.getuid() != 0:
- print "xm requires root access to execute, please try again as root"
- sys.exit(1)
-
-from xen.xend.XendClient import XendError, server
-from xen.xend.XendClient import main as xend_client_main
-from xen.xm import create, destroy, migrate, shutdown, sysrq
from xen.xm.opts import *
+shorthelp = """Usage: xm <subcommand> [args]
+ Control, list, and manipulate Xen guest instances
+
+xm common subcommands:
+ console <DomId> attach to console of DomId
+ create <CfgFile> create a domain based on Config File
+ destroy <DomId> terminate a domain immediately
+ help display this message
+ list [DomId, ...] list information about domains
+ mem-max <DomId> <Mem> set the maximum memory reservation for a domain
+ mem-set <DomId> <Mem> adjust the current memory usage for a domain
+ migrate <DomId> <Host> migrate a domain to another machine
+ pause <DomId> pause execution of a domain
+ reboot <DomId> reboot a domain
+ restore <File> create a domain from a saved state file
+ save <DomId> <File> save domain state (and config) to file
+ shutdown <DomId> shutdown a domain
+ top monitor system and domains in real-time
+ unpause <DomId> unpause a paused domain
+
+For a complete list of subcommands run 'xm help --long'
+For more help on xm see the xm(1) man page
+For more help on xm create, see the xmdomain.cfg(5) man page"""
+
+longhelp = """Usage: xm <subcommand> [args]
+ Control, list, and manipulate Xen guest instances
+
+xm full list of subcommands:
+
+ Domain Commands:
+ console <DomId> attach to console of DomId
+ cpus-list <DomId> <VCpu> get the list of cpus for a VCPU
+ cpus-set <DomId> <VCpu> <CPUS> set which cpus a VCPU can use.
+ create <ConfigFile> create a domain
+ destroy <DomId> terminate a domain immediately
+ domid <DomName> convert a domain name to a domain id
+ domname <DomId> convert a domain id to a domain name
+ list list information about domains
+ mem-max <DomId> <Mem> set domain maximum memory limit
+ mem-set <DomId> <Mem> set the domain's memory dynamically
+ migrate <DomId> <Host> migrate a domain to another machine
+ pause <DomId> pause execution of a domain
+ reboot [-w|-a] <DomId> reboot a domain
+ restore <File> create a domain from a saved state file
+ save <DomId> <File> save domain state (and config) to file
+ shutdown [-w|-a] <DomId> shutdown a domain
+ sysrq <DomId> <letter> send a sysrq to a domain
+ unpause <DomId> unpause a paused domain
+ vcpu-enable <DomId> <VCPU> disable VCPU in a domain
+ vcpu-disable <DomId> <VCPU> enable VCPU in a domain
+ vcpu-list <DomId> get the list of VCPUs for a domain
+
+ Xen Host Commands:
+ dmesg [--clear] read or clear Xen's message buffer
+ info get information about the xen host
+ log print the xend log
+ top monitor system and domains in real-time
+
+ Scheduler Commands:
+ bvt <options> set BVT scheduler parameters
+ bvt_ctxallow <Allow> set the BVT scheduler context switch allowance
+ sedf <options> set simple EDF parameters
+
+ Virtual Device Commands:
+ block-create <DomId> <BackDev> <FrontDev> <Mode> [BackDomId]
+ Create a new virtual block device
+ block-destroy <DomId> <DevId> Destroy a domain's virtual block device
+ block-list <DomId> List virtual block devices for a domain
+ block-refresh <DomId> <DevId> Refresh a virtual block device for a domain
+ network-limit <DomId> <Vif> <Credit> <Period>
+ Limit the transmission rate of a virtual network interface
+ network-list <DomId> List virtual network interfaces for a domain
+
+For a short list of subcommands run 'xm help'
+For more help on xm see the xm(1) man page
+For more help on xm create, see the xmdomain.cfg(5) man page"""
+
+####################################################################
+#
+# Utility functions
+#
+####################################################################
+
+def arg_check(args,num,name):
+ if len(args) < num:
+ err("'xm %s' requires %s argument(s)!\n" % (name, num))
+ usage(name)
def unit(c):
if not c.isalpha():
@@ -48,724 +145,325 @@
else:
return value * (base / dst_base)
-class Group:
-
- name = ""
- info = ""
-
- def __init__(self, xm):
- self.xm = xm
- self.progs = {}
-
- def addprog(self, prog):
- self.progs[prog.name] = prog
-
- def getprog(self, name):
- return self.progs.get(name)
-
- def proglist(self):
- kl = self.progs.keys()
- kl.sort()
- return [ self.getprog(k) for k in kl ]
-
- def help(self, args):
- if self.info:
- print
- print self.info
- print
- else:
- print
-
- def shortHelp(self, args):
- self.help(args)
- for p in self.proglist():
- p.shortHelp(args)
-
-class Prog:
- """Base class for sub-programs.
- """
-
- """Program group it belongs to"""
- group = 'all'
- """Program name."""
- name = '??'
- """Short program info."""
- info = ''
-
- def __init__(self, xm):
- self.xm = xm
-
- def err(self, msg):
- self.xm.err(msg)
-
- def help(self, args):
- self.shortHelp(args)
-
- def shortHelp(self, args):
- print "%-14s %s" % (self.name, self.info)
-
- def main(self, args):
- """Program main entry point.
- """
- pass
-
-
-class ProgUnknown(Prog):
-
- name = 'unknown'
- info = ''
-
- def help(self, args):
- self.xm.err("Unknown command: %s\nTry '%s help' for more information."
- % (args[0], self.xm.name))
-
- main = help
-
-class Xm:
- """Main application.
- """
-
- def __init__(self):
- self.name = 'xm'
- self.unknown = ProgUnknown(self)
- self.progs = {}
- self.groups = {}
-
- def err(self, msg):
- print >>sys.stderr, "Error:", msg
+def err(msg):
+ print >>sys.stderr, "Error:", msg
+
+def handle_xend_error(cmd, dom, ex):
+ error = str(ex)
+ if error == "Not found" and dom != None:
+ err("Domain '%s' not found when running 'xm %s'" % (dom, cmd))
sys.exit(1)
-
- def main(self, args):
- try:
- self.main_call(args)
- except socket.error, ex:
- print >>sys.stderr, ex
- self.err("Error connecting to xend, is xend running?")
- except XendError, ex:
- self.err(str(ex))
-
- def main_call(self, args):
- """Main entry point. Dispatches to the progs.
- """
- self.name = args[0]
- if len(args) < 2:
- args.append('help')
- help = self.helparg(args)
- p = self.getprog(args[1], self.unknown)
- if help or len(args) < 2:
- p.help(args[1:])
- else:
- p.main(args[1:])
-
- def helparg(self, args):
- for a in args:
- if a in ['-h', '--help']:
- return 1
- return 0
-
- def prog(self, pklass):
- """Add a sub-program.
-
- pklass program class (Prog subclass)
- """
- p = pklass(self)
- self.progs[p.name] = p
- self.getgroup(p.group).addprog(p)
- return p
-
- def getprog(self, name, val=None):
- """Get a sub-program.
- name Name of the sub-program (or optionally, an unambiguous
- prefix of its name)
- val Default return value if no (unique) match is found
- """
-
- match = None
- for progname in self.progs.keys():
- if progname == name:
- match = progname
- break
- if progname.startswith(name):
- if not match:
- match = progname
- else:
- return val # name is ambiguous - bail out
-
- return self.progs.get(match, val)
-
- def group(self, klass):
- g = klass(self)
- self.groups[g.name] = g
- return g
-
- def getgroup(self, name):
- return self.groups[name]
-
- def grouplist(self):
- kl = self.groups.keys()
- kl.sort()
- return [ self.getgroup(k) for k in kl ]
-
-# Create the application object, then add the sub-program classes.
-xm = Xm()
-
-class GroupAll(Group):
-
- name = "all"
- info = ""
-
-xm.group(GroupAll)
-
-class GroupDomain(Group):
-
- name = "domain"
- info = "Commands on domains:"
-
-xm.group(GroupDomain)
-
-class GroupScheduler(Group):
-
- name = "scheduler"
- info = "Comands controlling scheduling:"
-
-xm.group(GroupScheduler)
-
-class GroupHost(Group):
-
- name = "host"
- info = "Commands related to the xen host (node):"
-
-xm.group(GroupHost)
-
-class GroupConsole(Group):
-
- name = "console"
- info = "Commands related to consoles:"
-
-xm.group(GroupConsole)
-
-class GroupVbd(Group):
-
- name = "vbd"
- info = "Commands related to virtual block devices:"
-
-xm.group(GroupVbd)
-
-class GroupVif(Group):
-
- name = "vif"
- info = "Commands related to virtual network interfaces:"
-
-xm.group(GroupVif)
-
-class ProgHelp(Prog):
-
- name = "help"
- info = "Print help."
-
- def help(self, args):
- if len(args) == 2:
- name = args[1]
- p = self.xm.getprog(name)
- if p:
- p.help(args[1:])
- else:
- print '%s: Unknown command: %s' % (self.name, name)
- else:
- for g in self.xm.grouplist():
- g.shortHelp(args)
- print "\nTry '%s help CMD' for help on CMD" % self.xm.name
-
- main = help
-
-xm.prog(ProgHelp)
-
-class ProgCreate(Prog):
-
- group = 'domain'
- name = "create"
- info = """Create a domain."""
-
- def help(self, args):
- create.main([args[0], '-h'])
-
- def main(self, args):
- create.main(args)
-
-xm.prog(ProgCreate)
-
-class ProgSave(Prog):
- group = 'domain'
- name = "save"
- info = """Save domain state (and config) to file."""
-
- def help(self, args):
- print args[0], "DOM FILE"
- print """\nSave domain with id DOM to FILE."""
-
- def main(self, args):
- if len(args) < 3: self.err("%s: Missing arguments" % args[0])
- dom = args[1]
- savefile = os.path.abspath(args[2])
- server.xend_domain_save(dom, savefile)
-
-xm.prog(ProgSave)
-
-class ProgRestore(Prog):
- group = 'domain'
- name = "restore"
- info = """Create a domain from a saved state."""
-
- def help(self, args):
- print args[0], "FILE"
- print "\nRestore a domain from FILE."
-
- def main(self, args):
- if len(args) < 2: self.err("%s: Missing arguments" % args[0])
- savefile = os.path.abspath(args[1])
- info = server.xend_domain_restore(savefile)
- PrettyPrint.prettyprint(info)
- id = sxp.child_value(info, 'id')
- if id is not None:
- server.xend_domain_unpause(id)
-
-xm.prog(ProgRestore)
-
-class ProgMigrate(Prog):
- group = 'domain'
- name = "migrate"
- info = """Migrate a domain to another machine."""
-
- def help(self, args):
- migrate.help([self.name] + args)
-
- def main(self, args):
- migrate.main(args)
-
-xm.prog(ProgMigrate)
-
-class ProgList(Prog):
- group = 'domain'
- name = "list"
- info = """List information about domains."""
-
- short_options = 'lv'
- long_options = ['long','vcpus']
-
- def help(self, args):
- if help:
- print args[0], '[options] [DOM...]'
- print """\nGet information about domains.
- Either all domains or the domains given.
-
- -l, --long Get more detailed information.
- -v, --vcpus Show VCPU to CPU mapping.
- """
- return
-
- def main(self, args):
- use_long = 0
- show_vcpus = 0
- (options, params) = getopt(args[1:],
- self.short_options,
- self.long_options)
- n = len(params)
- for (k, v) in options:
- if k in ['-l', '--long']:
- use_long = 1
- if k in ['-v', '--vcpus']:
- show_vcpus = 1
-
- if n == 0:
- doms = server.xend_domains()
- doms.sort()
- else:
- doms = params
-
- if use_long:
- self.long_list(doms)
- elif show_vcpus:
- self.show_vcpus(doms)
- else:
- self.brief_list(doms)
-
- def brief_list(self, doms):
- print 'Name Id Mem(MB) CPU VCPU(s) State Time(s)
Console'
- for dom in doms:
- info = server.xend_domain(dom)
- d = {}
- d['dom'] = int(sxp.child_value(info, 'id', '-1'))
- d['name'] = sxp.child_value(info, 'name', '??')
- d['mem'] = int(sxp.child_value(info, 'memory', '0'))
- d['cpu'] = str(sxp.child_value(info, 'cpu', '0'))
- d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
- d['state'] = sxp.child_value(info, 'state', '??')
- d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
- console = sxp.child(info, 'console')
- if console:
- d['port'] = sxp.child_value(console, 'console_port')
- else:
- d['port'] = ''
- if d['vcpus'] > 1:
- d['cpu'] = '-'
- if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0):
- d['ssidref1'] = int(sxp.child_value(info, 'ssidref', '0')) &
0xffff
- d['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >>
16) & 0xffff
- print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s s:%(ssidref2)02x/p:%(ssidref1)02x"
% d)
- else:
- print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f %(port)4s" % d)
-
- def show_vcpus(self, doms):
- print 'Name Id VCPU CPU CPUMAP'
- for dom in doms:
- info = server.xend_domain(dom)
- vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '-1').split('|')
- cpumap = sxp.child_value(info, 'cpumap', [])
- mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
- count = 0
- for cpu in vcpu_to_cpu:
- d = {}
- d['name'] = sxp.child_value(info, 'name', '??')
- d['dom'] = int(sxp.child_value(info, 'id', '-1'))
- d['vcpu'] = int(count)
- d['cpu'] = int(cpu)
- d['cpumap'] = int(cpumap[count])&mask
- count = count + 1
- print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d
0x%(cpumap)x" % d)
-
- def long_list(self, doms):
+ else:
+ raise ex
+
+
+#########################################################################
+#
+# Main xm functions
+#
+#########################################################################
+
+def xm_create(args):
+ from xen.xm import create
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ create.main(args)
+
+def xm_save(args):
+ arg_check(args,2,"save")
+
+ dom = args[0] # TODO: should check if this exists
+ savefile = os.path.abspath(args[1])
+
+ from xen.xend.XendClient import server
+ server.xend_domain_save(dom, savefile)
+
+def xm_restore(args):
+ arg_check(args,1,"restore")
+
+ savefile = os.path.abspath(args[0])
+
+ from xen.xend.XendClient import server
+ info = server.xend_domain_restore(savefile)
+ PrettyPrint.prettyprint(info)
+ id = sxp.child_value(info, 'id')
+ if id is not None:
+ server.xend_domain_unpause(id)
+
+def xm_migrate(args):
+ # TODO: arg_check
+ from xen.xm import migrate
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ migrate.main(args)
+
+def xm_list(args):
+ use_long = 0
+ show_vcpus = 0
+ try:
+ (options, params) = getopt(args, 'lv', ['long','vcpus'])
+ except GetoptError, opterr:
+ err(opterr)
+ sys.exit(1)
+
+ n = len(params)
+ for (k, v) in options:
+ if k in ['-l', '--long']:
+ use_long = 1
+ if k in ['-v', '--vcpus']:
+ show_vcpus = 1
+
+ domsinfo = []
+ from xen.xend.XendClient import server
+ if n == 0:
+ doms = server.xend_domains()
+ doms.sort()
+ else:
+ doms = params
+ for dom in doms:
+ info = server.xend_domain(dom)
+ domsinfo.append(parse_doms_info(info))
+
+ if use_long:
for dom in doms:
info = server.xend_domain(dom)
PrettyPrint.prettyprint(info)
-
-xm.prog(ProgList)
-
-class ProgDestroy(Prog):
- group = 'domain'
- name = "destroy"
- info = """Terminate a domain immediately."""
-
- def help(self, args):
- destroy.main([args[0], '-h'])
-
- def main(self, args):
- destroy.main(args)
-
-xm.prog(ProgDestroy)
-
-class ProgShutdown(Prog):
- group = 'domain'
- name = "shutdown"
- info = """Shutdown a domain."""
-
- def help(self, args):
- shutdown.main([args[0], '-h'])
-
- def main(self, args):
- shutdown.main(args)
-
-xm.prog(ProgShutdown)
-
-class ProgSysrq(Prog):
- group = 'domain'
- name = "sysrq"
- info = """Send a sysrq to a domain."""
-
- def help(self, args):
- sysrq.main([args[0], '-h'])
-
- def main(self, args):
- sysrq.main(args)
-
-xm.prog(ProgSysrq)
-
-class ProgPause(Prog):
- group = 'domain'
- name = "pause"
- info = """Pause execution of a domain."""
-
- def help(self, args):
- print args[0], 'DOM'
- print '\nPause execution of domain DOM.'
-
- def main(self, args):
- if len(args) < 2: self.err("%s: Missing domain" % args[0])
- dom = args[1]
- server.xend_domain_pause(dom)
-
-xm.prog(ProgPause)
-
-class ProgUnpause(Prog):
- group = 'domain'
- name = "unpause"
- info = """Unpause a paused domain."""
-
- def help(self, args):
- print args[0], 'DOM'
- print '\nUnpause execution of domain DOM.'
-
- def main(self, args):
- if len(args) < 2: self.err("%s: Missing domain" % args[0])
- dom = args[1]
- server.xend_domain_unpause(dom)
-
-xm.prog(ProgUnpause)
-
-class ProgPincpu(Prog):
- group = 'domain'
- name = "pincpu"
- info = """Set which cpus a VCPU can use. """
-
- def help(self, args):
- print args[0],'DOM VCPU CPUS'
- print '\nSet which cpus VCPU in domain DOM can use.'
-
- # convert list of cpus to bitmap integer value
- def make_map(self, cpulist):
- cpus = []
- cpumap = 0
- for c in cpulist.split(','):
- if c.find('-') != -1:
- (x,y) = c.split('-')
- for i in range(int(x),int(y)+1):
- cpus.append(int(i))
- else:
- cpus.append(int(c))
- cpus.sort()
- for c in cpus:
- cpumap = cpumap | 1<<c
-
- return cpumap
-
- def main(self, args):
- if len(args) != 4: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- vcpu = int(args[2])
- cpumap = self.make_map(args[3]);
- server.xend_domain_pincpu(dom, vcpu, cpumap)
-
-xm.prog(ProgPincpu)
-
-class ProgMaxmem(Prog):
- group = 'domain'
- name = 'maxmem'
- info = """Set domain memory limit."""
-
- def help(self, args):
- print args[0], "DOM MEMORY"
- print "\nSet the memory limit for domain DOM to MEMORY megabytes."
-
- def main(self, args):
- if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- mem = int_unit(args[2], 'm')
- server.xend_domain_maxmem_set(dom, mem)
-
-xm.prog(ProgMaxmem)
-
-class ProgSetMem(Prog):
- group = 'domain'
- name = 'set-mem'
- info = """Set the domain's memory footprint using the balloon driver."""
-
- def help(self, args):
- print args[0], "DOM MEMORY_TARGET"
- print """\nRequest domain DOM to adjust its memory footprint to
-MEMORY_TARGET megabytes"""
-
- def main(self, args):
- if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- mem_target = int_unit(args[2], 'm')
- server.xend_domain_mem_target_set(dom, mem_target)
-
-xm.prog(ProgSetMem)
-
-class ProgVcpuhotplug(Prog):
- group = 'domain'
- name = 'vcpu-hotplug'
- info = """Enable or disable a VCPU in a domain."""
-
- def help(self, args):
- print args[0], "DOM VCPU [0|1]"
- print """\nRequest virtual processor VCPU to be disabled or enabled in
-domain DOM"""
-
- def main(self, args):
- if len(args) != 4: self.err("%s: Invalid arguments(s)" % args[0])
- name = args[1]
- vcpu = int(args[2])
- state = int(args[3])
- dom = server.xend_domain(name)
- id = sxp.child_value(dom, 'id')
- server.xend_domain_vcpu_hotplug(id, vcpu, state)
-
-xm.prog(ProgVcpuhotplug)
-
-class ProgDomid(Prog):
- group = 'domain'
- name = 'domid'
- info = 'Convert a domain name to a domain id.'
-
- def help(self, args):
- print args[0], "DOM"
- print '\nGet the domain id for the domain with name DOM.'
+ elif show_vcpus:
+ xm_show_vcpus(domsinfo)
+ else:
+ xm_brief_list(domsinfo)
+
+def parse_doms_info(info):
+ dominfo = {}
+ dominfo['dom'] = int(sxp.child_value(info, 'id', '-1'))
+ dominfo['name'] = sxp.child_value(info, 'name', '??')
+ dominfo['mem'] = int(sxp.child_value(info, 'memory', '0'))
+ dominfo['cpu'] = str(sxp.child_value(info, 'cpu', '0'))
+ dominfo['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
+ # if there is more than 1 cpu, the value doesn't mean much
+ if dominfo['vcpus'] > 1:
+ dominfo['cpu'] = '-'
+ dominfo['state'] = sxp.child_value(info, 'state', '??')
+ dominfo['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
+ # security identifiers
+ if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0):
+ dominfo['ssidref1'] = int(sxp.child_value(info, 'ssidref', '0')) &
0xffff
+ dominfo['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >>
16) & 0xffff
+ # get out the vcpu information
+ dominfo['vcpulist'] = []
+ vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '-1').split('|')
+ cpumap = sxp.child_value(info, 'cpumap', [])
+ mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
+ count = 0
+ for cpu in vcpu_to_cpu:
+ vcpuinfo = {}
+ vcpuinfo['name'] = sxp.child_value(info, 'name', '??')
+ vcpuinfo['dom'] = int(sxp.child_value(info, 'id', '-1'))
+ vcpuinfo['vcpu'] = int(count)
+ vcpuinfo['cpu'] = int(cpu)
+ vcpuinfo['cpumap'] = int(cpumap[count])&mask
+ count = count + 1
+ dominfo['vcpulist'].append(vcpuinfo)
+ return dominfo
- def main (self, args):
- if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
- name = args[1]
- dom = server.xend_domain(name)
- print sxp.child_value(dom, 'id')
-
-xm.prog(ProgDomid)
-
-class ProgDomname(Prog):
- group = 'domain'
- name = 'domname'
- info = 'Convert a domain id to a domain name.'
-
- def help(self, args):
- print args[0], "DOM"
- print '\nGet the name for the domain with id DOM.'
-
- def main (self, args):
- if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
- name = args[1]
- dom = server.xend_domain(name)
- print sxp.child_value(dom, 'name')
-
-xm.prog(ProgDomname)
-
-class ProgBvt(Prog):
- group = 'scheduler'
- name = "bvt"
- info = """Set BVT scheduler parameters."""
-
- def help(self, args):
- print args[0], "DOM MCUADV WARPBACK WARPVALUE WARPL WARPU"
- print '\nSet Borrowed Virtual Time scheduler parameters.'
-
- def main(self, args):
- if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- v = map(long, args[2:7])
- server.xend_domain_cpu_bvt_set(dom, *v)
-
-xm.prog(ProgBvt)
-
-class ProgBvtslice(Prog):
- group = 'scheduler'
- name = "bvt_ctxallow"
- info = """Set the BVT scheduler context switch allowance."""
-
- def help(self, args):
- print args[0], 'CTX_ALLOW'
- print '\nSet Borrowed Virtual Time scheduler context switch allowance.'
-
- def main(self, args):
- if len(args) < 2: self.err('%s: Missing context switch allowance'
- % args[0])
- slice = int(args[1])
- server.xend_node_cpu_bvt_slice_set(slice)
-
-xm.prog(ProgBvtslice)
-
-class ProgSedf(Prog):
- group = 'scheduler'
- name= "sedf"
- info = """Set simple EDF parameters."""
-
- def help(self, args):
- print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
- print "\nSet simple EDF parameters."
-
- def main(self, args):
- if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- v = map(int, args[2:7])
- server.xend_domain_cpu_sedf_set(dom, *v)
-
-xm.prog(ProgSedf)
-
-class ProgInfo(Prog):
- group = 'host'
- name = "info"
- info = """Get information about the xen host."""
-
- def main(self, args):
- info = server.xend_node()
- for x in info[1:]:
- print "%-23s:" % x[0], x[1]
-
-xm.prog(ProgInfo)
-
-class ProgConsoles(Prog):
- group = 'console'
- name = "consoles"
- info = """Get information about domain consoles."""
-
- def main(self, args):
- l = server.xend_consoles()
- print "Dom Port Id Connection"
- for x in l:
- info = server.xend_console(x)
- d = {}
- d['dom'] = sxp.child(info, 'domain', '?')[1]
- d['port'] = sxp.child_value(info, 'console_port', '?')
- d['id'] = sxp.child_value(info, 'id', '?')
- connected = sxp.child(info, 'connected')
- if connected:
- d['conn'] = '%s:%s' % (connected[1], connected[2])
- else:
- d['conn'] = ''
- print "%(dom)3s %(port)4s %(id)3s %(conn)s" % d
-
-xm.prog(ProgConsoles)
-
-class ProgConsole(Prog):
- group = 'console'
- name = "console"
- info = """Open a console to a domain."""
-
- def help(self, args):
- print args[0], "DOM"
- print "\nOpen a console to domain DOM."
-
- def main(self, args):
- if len(args) < 2: self.err("%s: Missing domain" % args[0])
- dom = args[1]
- info = server.xend_domain(dom)
- console = sxp.child(info, "console")
- if not console:
- self.err("No console information")
- port = sxp.child_value(console, "console_port")
- from xen.util import console_client
- path = "/var/lib/xend/console-%s" % port
- console_client.connect("localhost", int(port), path=path)
-
-xm.prog(ProgConsole)
-
-class ProgCall(Prog):
- name = "call"
- info = "Call xend api functions."
-
- def help (self, args):
- print args[0], "function args..."
- print """
- Call a xend HTTP API function. The leading 'xend_' on the function
-can be omitted. See xen.xend.XendClient for the API functions.
-"""
-
- def main(self, args):
- xend_client_main(args)
-
-xm.prog(ProgCall)
-
-class ProgDmesg(Prog):
- group = 'host'
- name = "dmesg"
- info = """Read or clear Xen's message buffer."""
-
+def xm_brief_list(domsinfo):
+ print 'Name Id Mem(MB) CPU VCPU(s) State Time(s)'
+ for dominfo in domsinfo:
+ if dominfo.has_key("ssidref1"):
+ print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f s:%(ssidref2)02x/p:%(ssidref1)02x" % dominfo)
+ else:
+ print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d
%(state)5s %(cpu_time)7.1f" % dominfo)
+
+def xm_show_vcpus(domsinfo):
+ print 'Name Id VCPU CPU CPUMAP'
+ for dominfo in domsinfo:
+ for vcpuinfo in dominfo['vcpulist']:
+ print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d 0x%(cpumap)x" %
+ vcpuinfo)
+
+def xm_vcpu_list(args):
+ args.insert(0,"-v")
+ xm_list(args)
+
+def xm_destroy(args):
+ arg_check(args,1,"destroy")
+
+ from xen.xm import destroy
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ destroy.main(args)
+
+def xm_reboot(args):
+ arg_check(args,1,"reboot")
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ args.insert(2,"-R")
+ from xen.xm import shutdown
+ shutdown.main(args)
+
+def xm_shutdown(args):
+ arg_check(args,1,"shutdown")
+
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ from xen.xm import shutdown
+ shutdown.main(args)
+
+def xm_sysrq(args):
+ from xen.xm import sysrq
+ # ugly hack because the opt parser apparently wants
+ # the subcommand name just to throw it away!
+ args.insert(0,"bogus")
+ sysrq.main(args)
+
+def xm_pause(args):
+ arg_check(args, 1, "pause")
+ dom = args[0]
+
+ from xen.xend.XendClient import server
+ server.xend_domain_pause(dom)
+
+def xm_unpause(args):
+ arg_check(args, 1, "unpause")
+ dom = args[0]
+
+ from xen.xend.XendClient import server
+ server.xend_domain_unpause(dom)
+
+#############################################################
+
+def cpu_make_map(cpulist):
+ cpus = []
+ cpumap = 0
+ for c in cpulist.split(','):
+ if c.find('-') != -1:
+ (x,y) = c.split('-')
+ for i in range(int(x),int(y)+1):
+ cpus.append(int(i))
+ else:
+ cpus.append(int(c))
+ cpus.sort()
+ for c in cpus:
+ cpumap = cpumap | 1<<c
+
+ return cpumap
+
+def xm_cpus_set(args):
+ arg_check(args, 3, "cpus-set")
+
+ dom = args[0]
+ vcpu = int(args[1])
+ cpumap = cpu_make_map(args[2])
+
+ from xen.xend.XendClient import server
+ server.xend_domain_pincpu(dom, vcpu, cpumap)
+
+def xm_mem_max(args):
+ arg_check(args, 2, "mem-max")
+
+ dom = args[0]
+ mem = int_unit(args[1], 'm')
+
+ from xen.xend.XendClient import server
+ server.xend_domain_maxmem_set(dom, mem)
+
+def xm_mem_set(args):
+ arg_check(args, 2, "mem-set")
+
+ dom = args[0]
+ mem_target = int_unit(args[1], 'm')
+
+ from xen.xend.XendClient import server
+ server.xend_domain_mem_target_set(dom, mem_target)
+
+# TODO: why does this lookup by name? and what if that fails!?
+def xm_vcpu_enable(args):
+ arg_check(args, 2, "vcpu-enable")
+
+ name = args[0]
+ vcpu = int(args[1])
+
+ from xen.xend.XendClient import server
+ dom = server.xend_domain(name)
+ id = sxp.child_value(dom, 'id')
+ server.xend_domain_vcpu_hotplug(id, vcpu, 1)
+
+def xm_vcpu_disable(args):
+ arg_check(args, 2, "vcpu-disable")
+
+ name = args[0]
+ vcpu = int(args[1])
+
+ from xen.xend.XendClient import server
+ dom = server.xend_domain(name)
+ id = sxp.child_value(dom, 'id')
+ server.xend_domain_vcpu_hotplug(id, vcpu, 0)
+
+def xm_domid(args):
+ name = args[0]
+
+ from xen.xend.XendClient import server
+ dom = server.xend_domain(name)
+ print sxp.child_value(dom, 'id')
+
+def xm_domname(args):
+ name = args[0]
+
+ from xen.xend.XendClient import server
+ dom = server.xend_domain(name)
+ print sxp.child_value(dom, 'name')
+
+def xm_bvt(args):
+ arg_check(args, 6, "bvt")
+ dom = args[0]
+ v = map(long, args[1:6])
+ from xen.xend.XendClient import server
+ server.xend_domain_cpu_bvt_set(dom, *v)
+
+def xm_bvt_ctxallow(args):
+ arg_check(args, 1, "bvt_ctxallow")
+
+ slice = int(args[0])
+ from xen.xend.XendClient import server
+ server.xend_node_cpu_bvt_slice_set(slice)
+
+def xm_sedf(args):
+ arg_check(args, 6, "sedf")
+
+ dom = args[0]
+ v = map(int, args[1:6])
+ from xen.xend.XendClient import server
+ server.xend_domain_cpu_sedf_set(dom, *v)
+
+def xm_info(args):
+ from xen.xend.XendClient import server
+ info = server.xend_node()
+
+ for x in info[1:]:
+ print "%-23s:" % x[0], x[1]
+
+# TODO: remove as soon as console server shows up
+def xm_console(args):
+ arg_check(args,1,"console")
+
+ dom = args[0]
+ from xen.xend.XendClient import server
+ info = server.xend_domain(dom)
+ domid = int(sxp.child_value(info, 'id', '-1'))
+ cmd = "/usr/libexec/xen/xenconsole %d" % domid
+ os.execvp('/usr/libexec/xen/xenconsole', cmd.split())
+ console = sxp.child(info, "console")
+
+def xm_top(args):
+ os.execv('/usr/sbin/xentop', ['/usr/sbin/xentop'])
+
+def xm_dmesg(args):
+
gopts = Opts(use="""[-c|--clear]
Read Xen's message buffer (boot output, warning and error messages) or clear
@@ -775,161 +473,220 @@
gopts.opt('clear', short='c',
fn=set_true, default=0,
use="Clear the contents of the Xen message buffer.")
-
- short_options = ['-c']
- long_options = ['--clear']
-
- def help(self, args):
- self.gopts.argv = args
- self.gopts.usage()
-
- def main(self, args):
- self.gopts.parse(args)
- if not (1 <= len(args) <=2):
- self.gopts.err('Invalid arguments: ' + str(args))
-
- if not self.gopts.vals.clear:
- print server.xend_node_get_dmesg()
- else:
- server.xend_node_clear_dmesg()
-
-xm.prog(ProgDmesg)
-
-class ProgLog(Prog):
- group = 'host'
- name = "log"
- info = """Print the xend log."""
-
- def main(self, args):
- print server.xend_node_log()
-
-xm.prog(ProgLog)
-
-class ProgVifCreditLimit(Prog):
- group = 'vif'
- name= "vif-limit"
- info = """Limit the transmission rate of a virtual network interface."""
-
- def help(self, args):
- print args[0], "DOMAIN VIF CREDIT_IN_BYTES PERIOD_IN_USECS"
- print "\nSet the credit limit of a virtual network interface."
-
- def main(self, args):
- if len(args) != 5: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- v = map(int, args[2:5])
- server.xend_domain_vif_limit(dom, *v)
-
-xm.prog(ProgVifCreditLimit)
-
-class ProgVifList(Prog):
- group = 'vif'
- name = 'vif-list'
- info = """List virtual network interfaces for a domain."""
-
- def help(self, args):
- print args[0], "DOM"
- print "\nList virtual network interfaces for domain DOM"
-
- def main(self, args):
- if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- for x in server.xend_domain_devices(dom, 'vif'):
- sxp.show(x)
+ # Work around for gopts
+ args.insert(0,"bogus")
+ gopts.parse(args)
+ if not (1 <= len(args) <= 2):
+ err('Invalid arguments: ' + str(args))
+
+ from xen.xend.XendClient import server
+ if not gopts.vals.clear:
+ print server.xend_node_get_dmesg()
+ else:
+ server.xend_node_clear_dmesg()
+
+def xm_log(args):
+ from xen.xend.XendClient import server
+ print server.xend_node_log()
+
+def xm_network_limit(args):
+ arg_check(args,4,"network-limit")
+ dom = args[0]
+ v = map(int, args[1:4])
+ from xen.xend.XendClient import server
+ server.xend_domain_vif_limit(dom, *v)
+
+def xm_network_list(args):
+ arg_check(args,1,"network-list")
+ dom = args[0]
+ from xen.xend.XendClient import server
+ for x in server.xend_domain_devices(dom, 'vif'):
+ sxp.show(x)
+ print
+
+def xm_block_list(args):
+ arg_check(args,1,"block-list")
+ dom = args[0]
+ from xen.xend.XendClient import server
+ for x in server.xend_domain_devices(dom, 'vbd'):
+ sxp.show(x)
+ print
+
+def xm_block_create(args):
+ n = len(args)
+ if n < 4 or n > 5:
+ err("%s: Invalid argument(s)" % args[0])
+ usage("block-create")
+
+ dom = args[0]
+ vbd = ['vbd',
+ ['uname', args[1]],
+ ['dev', args[2]],
+ ['mode', args[3]]]
+ if n == 5:
+ vbd.append(['backend', args[4]])
+
+ from xen.xend.XendClient import server
+ server.xend_domain_device_create(dom, vbd)
+
+def xm_block_refresh(args):
+ arg_check(args,2,"block-refresh")
+
+ dom = args[0]
+ dev = args[1]
+
+ from xen.xend.XendClient import server
+ server.xend_domain_device_refresh(dom, 'vbd', dev)
+
+def xm_block_destroy(args):
+ arg_check(args,2,"block-destroy")
+
+ dom = args[0]
+ dev = args[1]
+
+ from xen.xend.XendClient import server
+ server.xend_domain_device_destroy(dom, 'vbd', dev)
+
+commands = {
+ # console commands
+ "console": xm_console,
+ # xenstat commands
+ "top": xm_top,
+ # domain commands
+ "domid": xm_domid,
+ "domname": xm_domname,
+ "create": xm_create,
+ "destroy": xm_destroy,
+ "restore": xm_restore,
+ "save": xm_save,
+ "shutdown": xm_shutdown,
+ "reboot": xm_reboot,
+ "list": xm_list,
+ # memory commands
+ "mem-max": xm_mem_max,
+ "mem-set": xm_mem_set,
+ # cpu commands
+ "cpus-set": xm_cpus_set,
+# "cpus-list": xm_cpus_list,
+ "vcpu-enable": xm_vcpu_enable,
+ "vcpu-disable": xm_vcpu_disable,
+ "vcpu-list": xm_vcpu_list,
+ # migration
+ "migrate": xm_migrate,
+ # special
+ "sysrq": xm_sysrq,
+ "pause": xm_pause,
+ "unpause": xm_unpause,
+ # host commands
+ "dmesg": xm_dmesg,
+ "info": xm_info,
+ "log": xm_log,
+ # scheduler
+ "bvt": xm_bvt,
+ "bvt_ctxallow": xm_bvt_ctxallow,
+ "sedf": xm_sedf,
+ # block
+ "block-create": xm_block_create,
+ "block-destroy": xm_block_destroy,
+ "block-list": xm_block_list,
+ "block-refresh": xm_block_refresh,
+ # network
+ "network-limit": xm_network_limit,
+ "network-list": xm_network_list
+ }
+
+aliases = {
+ "balloon": "mem-set",
+ "vif-list": "network-list",
+ "vif-limit": "network-limit",
+ "vbd-create": "block-create",
+ "vbd-destroy": "block-destroy",
+ "vbd-list": "block-list",
+ "vbd-refresh": "block-refresh",
+ }
+
+help = {
+ "--long": longhelp
+ }
+
+def xm_lookup_cmd(cmd):
+ if commands.has_key(cmd):
+ return commands[cmd]
+ elif aliases.has_key(cmd):
+ deprecated(cmd,aliases[cmd])
+ return commands[aliases[cmd]]
+ else:
+ if len( cmd ) > 1:
+ matched_commands = filter( lambda (command, func): command[
0:len(cmd) ] == cmd, commands.iteritems() )
+ if len( matched_commands ) == 1:
+ return matched_commands[0][1]
+ err('Sub Command %s not found!' % cmd)
+ usage()
+
+def deprecated(old,new):
+ err('Option %s is deprecated, and will be removed in future!!!' % old)
+ err('Option %s is the new replacement, see "xm help %s" for more info' %
(new, new))
+
+def usage(cmd=None):
+ if cmd == "full":
+ print fullhelp
+ elif help.has_key(cmd):
+ print help[cmd]
+ else:
+ print shorthelp
+ sys.exit(1)
+
+def main(argv=sys.argv):
+ if len(argv) < 2:
+ usage()
+
+ if re.compile('-*help').match(argv[1]):
+ if len(argv) > 2 and help.has_key(argv[2]):
+ usage(argv[2])
+ else:
+ usage()
+ sys.exit(0)
+
+ cmd = xm_lookup_cmd(argv[1])
+
+ # strip off prog name and subcmd
+ args = argv[2:]
+ if cmd:
+ try:
+ from xen.xend.XendClient import XendError
+ rc = cmd(args)
+ if rc:
+ usage()
+ except socket.error, ex:
+ print >>sys.stderr, ex
+ err("Error connecting to xend, is xend running?")
+ sys.exit(1)
+ except IOError:
+ err("Most commands need root access. Please try again as root")
+ sys.exit(1)
+ except XendError, ex:
+ if args[0] == "bogus":
+ args.remove("bogus")
+ if len(args) > 0:
+ handle_xend_error(argv[1], args[0], ex)
+ else:
+ print "Unexpected error:", sys.exc_info()[0]
+ print
+ print "Please report to xen-devel@xxxxxxxxxxxxxxxxxxx"
+ raise
+ except SystemExit:
+ sys.exit(1)
+ except:
+ print "Unexpected error:", sys.exc_info()[0]
print
-
-xm.prog(ProgVifList)
-
-class ProgVbdList(Prog):
- group = 'vbd'
- name = 'vbd-list'
- info = """List virtual block devices for a domain."""
-
- def help(self, args):
- print args[0], "DOM"
- print "\nList virtual block devices for domain DOM"
-
- def main(self, args):
- if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- for x in server.xend_domain_devices(dom, 'vbd'):
- sxp.show(x)
- print
-
-xm.prog(ProgVbdList)
-
-class ProgVbdCreate(Prog):
- group = 'vbd'
- name = 'vbd-create'
- info = """Create a new virtual block device for a domain"""
-
- def help(self, args):
- print args[0], "DOM UNAME DEV MODE [BACKEND]"
- print """
-Create a virtual block device for a domain.
-
- UNAME - device to export, e.g. phy:hda2
- DEV - device name in the domain, e.g. sda1
- MODE - access mode: r for read, w for read-write
- BACKEND - backend driver domain
-"""
-
- def main(self, args):
- n = len(args)
- if n < 5 or n > 6: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- vbd = ['vbd',
- ['uname', args[2]],
- ['dev', args[3]],
- ['mode', args[4]]]
- if n == 6:
- vbd.append(['backend', args[5]])
- server.xend_domain_device_create(dom, vbd)
-
-xm.prog(ProgVbdCreate)
-
-class ProgVbdRefresh(Prog):
- group = 'vbd'
- name = 'vbd-refresh'
- info = """Refresh a virtual block device for a domain"""
-
- def help(self, args):
- print args[0], "DOM DEV"
- print """
-Refresh a virtual block device for a domain.
-
- DEV - idx field in the device information
-"""
-
- def main(self, args):
- if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- dev = args[2]
- server.xend_domain_device_refresh(dom, 'vbd', dev)
-
-xm.prog(ProgVbdRefresh)
-
-
-class ProgVbdDestroy(Prog):
- group = 'vbd'
- name = 'vbd-destroy'
- info = """Destroy a domain's virtual block device"""
-
- def help(self, args):
- print args[0], "DOM DEV"
- print """
-Destroy vbd DEV attached to domain DOM. Detaches the device
-from the domain, but does not destroy the device contents.
-The device indentifier DEV is the idx field in the device
-information. This is visible in 'xm vbd-list'."""
-
- def main(self, args):
- if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- dev = args[2]
- server.xend_domain_device_destroy(dom, 'vbd', dev)
-
-xm.prog(ProgVbdDestroy)
-
-def main(args):
- xm.main(args)
+ print "Please report to xen-devel@xxxxxxxxxxxxxxxxxxx"
+ raise
+
+ else:
+ usage()
+
+if __name__ == "__main__":
+ main()
+
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/migrate.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Domain migration.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/opts.py
--- a/tools/python/xen/xm/opts.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/opts.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
"""Object-oriented command-line option support.
"""
from getopt import getopt, GetoptError
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/shutdown.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
"""Domain shutdown.
"""
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/sysrq.py
--- a/tools/python/xen/xm/sysrq.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/sysrq.py Thu Aug 25 22:53:20 2005
@@ -21,9 +21,6 @@
fn=set_true, default=0,
use="Print this help.")
-def sysrq(dom, req):
- server.xend_domain_shutdown(dom, 'sysrq', req)
-
def main(argv):
opts = gopts
args = opts.parse(argv)
@@ -36,4 +33,4 @@
if len(args) < 2: opts.err('Missing sysrq character')
dom = args[0]
req = ord(args[1][0])
- sysrq(dom, req)
+ server.xend_domain_sysrq(dom, req)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/Makefile
--- a/tools/security/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/security/Makefile Thu Aug 25 22:53:20 2005
@@ -2,27 +2,71 @@
include $(XEN_ROOT)/tools/Rules.mk
SRCS = secpol_tool.c
-CFLAGS += -static
CFLAGS += -Wall
CFLAGS += -Werror
CFLAGS += -O3
CFLAGS += -fno-strict-aliasing
-CFLAGS += -I.
+CFLAGS += -I. -I/usr/include/libxml2
+CFLAGS_XML2BIN += $(shell xml2-config --cflags --libs )
+#if above does not work, try -L/usr/lib -lxml2 -lz -lpthread -lm
+XML2VERSION = $(shell xml2-config --version )
+VALIDATE_SCHEMA=$(shell if [[ $(XML2VERSION) < 2.6.20 ]]; then echo ""; else
echo "-DVALIDATE_SCHEMA"; fi; )
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_NULL_POLICY)
+POLICY=null
+endif
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_CHINESE_WALL_POLICY)
+POLICY=chwall
+endif
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+POLICY=ste
+endif
+ifeq
($(ACM_USE_SECURITY_POLICY),ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+POLICY=chwall_ste
+endif
+POLICYFILE=./policies/$(POLICY)/$(POLICY).bin
+
+ifneq ($(ACM_USE_SECURITY_POLICY), ACM_NULL_POLICY)
all: build
+
+install:all
+
+default:all
+else
+all:
+
+install:
+
+default:
+endif
+
build: mk-symlinks
$(MAKE) secpol_tool
+ $(MAKE) secpol_xml2bin
+ chmod 700 ./setlabel.sh
+ chmod 700 ./updategrub.sh
-default: all
-
-install: all
-
-secpol_tool : secpol_tool.c
+secpol_tool : secpol_tool.c secpol_compat.h
$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $<
+secpol_xml2bin : secpol_xml2bin.c secpol_xml2bin.h secpol_compat.h
+ $(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_XML2BIN) $(VALIDATE_SCHEMA) -o $@
$<
+
clean:
- rm -rf secpol_tool xen
+ rm -rf secpol_tool secpol_xml2bin xen
+policy_clean:
+ rm -rf policies/*/*.bin policies/*/*.map
+
+mrproper: clean policy_clean
+
+
+$(POLICYFILE) : build
+ @./secpol_xml2bin $(POLICY) > /dev/null
+
+boot_install: $(POLICYFILE)
+ @cp $(POLICYFILE) /boot
+ @./updategrub.sh $(POLICY) $(PWD)/$(XEN_ROOT)
LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse
mk-symlinks:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_tool.c Thu Aug 25 22:53:20 2005
@@ -31,18 +31,8 @@
#include <stdlib.h>
#include <sys/ioctl.h>
#include <string.h>
-#include <stdint.h>
#include <netinet/in.h>
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-typedef int8_t s8;
-typedef int16_t s16;
-typedef int32_t s32;
-typedef int64_t s64;
-
+#include "secpol_compat.h"
#include <xen/acm.h>
#include <xen/acm_ops.h>
#include <xen/linux/privcmd.h>
@@ -270,171 +260,6 @@
}
}
-/*************************** set policy ****************************/
-
-int acm_domain_set_chwallpolicy(void *bufstart, int buflen)
-{
-#define CWALL_MAX_SSIDREFS 6
-#define CWALL_MAX_TYPES 10
-#define CWALL_MAX_CONFLICTSETS 2
-
- struct acm_chwall_policy_buffer *chwall_bin_pol =
- (struct acm_chwall_policy_buffer *) bufstart;
- domaintype_t *ssidrefs, *conflicts;
- int ret = 0;
- int j;
-
- chwall_bin_pol->chwall_max_types = htonl(CWALL_MAX_TYPES);
- chwall_bin_pol->chwall_max_ssidrefs = htonl(CWALL_MAX_SSIDREFS);
- chwall_bin_pol->policy_code = htonl(ACM_CHINESE_WALL_POLICY);
- chwall_bin_pol->policy_version = htonl(ACM_CHWALL_VERSION);
- chwall_bin_pol->chwall_ssid_offset =
- htonl(sizeof(struct acm_chwall_policy_buffer));
- chwall_bin_pol->chwall_max_conflictsets =
- htonl(CWALL_MAX_CONFLICTSETS);
- chwall_bin_pol->chwall_conflict_sets_offset =
- htonl(ntohl(chwall_bin_pol->chwall_ssid_offset) +
- sizeof(domaintype_t) * CWALL_MAX_SSIDREFS * CWALL_MAX_TYPES);
- chwall_bin_pol->chwall_running_types_offset = 0; /* not set */
- chwall_bin_pol->chwall_conflict_aggregate_offset = 0; /* not set */
- ret += sizeof(struct acm_chwall_policy_buffer);
- /* now push example ssids into the buffer (max_ssidrefs x max_types
entries) */
- /* check buffer size */
- if ((buflen - ret) <
- (CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t)))
- return -1; /* not enough space */
-
- ssidrefs = (domaintype_t *) (bufstart +
- ntohl(chwall_bin_pol->chwall_ssid_offset));
- memset(ssidrefs, 0,
- CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t));
-
- /* now set type j-1 for ssidref i+1 */
- for (j = 0; j <= CWALL_MAX_SSIDREFS; j++)
- if ((0 < j) && (j <= CWALL_MAX_TYPES))
- ssidrefs[j * CWALL_MAX_TYPES + j - 1] = htons(1);
-
- ret += CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t);
- if ((buflen - ret) <
- (CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES * sizeof(domaintype_t)))
- return -1; /* not enough space */
-
- /* now the chinese wall policy conflict sets */
- conflicts = (domaintype_t *) (bufstart +
- ntohl(chwall_bin_pol->
- chwall_conflict_sets_offset));
- memset((void *) conflicts, 0,
- CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES *
- sizeof(domaintype_t));
- /* just 1 conflict set [0]={2,3}, [1]={1,5,6} */
- if (CWALL_MAX_TYPES > 3)
- {
- conflicts[2] = htons(1);
- conflicts[3] = htons(1); /* {2,3} */
- conflicts[CWALL_MAX_TYPES + 1] = htons(1);
- conflicts[CWALL_MAX_TYPES + 5] = htons(1);
- conflicts[CWALL_MAX_TYPES + 6] = htons(1); /* {0,5,6} */
- }
- ret += sizeof(domaintype_t) * CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES;
- return ret;
-}
-
-int acm_domain_set_stepolicy(void *bufstart, int buflen)
-{
-#define STE_MAX_SSIDREFS 6
-#define STE_MAX_TYPES 5
-
- struct acm_ste_policy_buffer *ste_bin_pol =
- (struct acm_ste_policy_buffer *) bufstart;
- domaintype_t *ssidrefs;
- int j, ret = 0;
-
- ste_bin_pol->ste_max_types = htonl(STE_MAX_TYPES);
- ste_bin_pol->ste_max_ssidrefs = htonl(STE_MAX_SSIDREFS);
- ste_bin_pol->policy_code = htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
- ste_bin_pol->policy_version = htonl(ACM_STE_VERSION);
- ste_bin_pol->ste_ssid_offset =
- htonl(sizeof(struct acm_ste_policy_buffer));
- ret += sizeof(struct acm_ste_policy_buffer);
- /* check buffer size */
- if ((buflen - ret) <
- (STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t)))
- return -1; /* not enough space */
-
- ssidrefs =
- (domaintype_t *) (bufstart + ntohl(ste_bin_pol->ste_ssid_offset));
- memset(ssidrefs, 0,
- STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t));
- /* all types 1 for ssidref 1 */
- for (j = 0; j < STE_MAX_TYPES; j++)
- ssidrefs[1 * STE_MAX_TYPES + j] = htons(1);
- /* now set type j-1 for ssidref j */
- for (j = 0; j < STE_MAX_SSIDREFS; j++)
- if ((0 < j) && (j <= STE_MAX_TYPES))
- ssidrefs[j * STE_MAX_TYPES + j - 1] = htons(1);
- ret += STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t);
- return ret;
-}
-
-#define MAX_PUSH_BUFFER 16384
-u8 push_buffer[MAX_PUSH_BUFFER];
-
-int acm_domain_setpolicy(int xc_handle)
-{
- int ret;
- struct acm_policy_buffer *bin_pol;
- acm_op_t op;
-
- /* future: read policy from file and set it */
- bin_pol = (struct acm_policy_buffer *) push_buffer;
- bin_pol->policy_version = htonl(ACM_POLICY_VERSION);
- bin_pol->magic = htonl(ACM_MAGIC);
- bin_pol->primary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
- bin_pol->secondary_policy_code =
- htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
-
- bin_pol->len = htonl(sizeof(struct acm_policy_buffer));
- bin_pol->primary_buffer_offset = htonl(ntohl(bin_pol->len));
- ret =
- acm_domain_set_chwallpolicy(push_buffer +
- ntohl(bin_pol->primary_buffer_offset),
- MAX_PUSH_BUFFER -
- ntohl(bin_pol->primary_buffer_offset));
- if (ret < 0)
- {
- printf("ERROR creating chwallpolicy buffer.\n");
- return -1;
- }
- bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
- bin_pol->secondary_buffer_offset = htonl(ntohl(bin_pol->len));
- ret = acm_domain_set_stepolicy(push_buffer +
- ntohl(bin_pol->secondary_buffer_offset),
- MAX_PUSH_BUFFER -
- ntohl(bin_pol->secondary_buffer_offset));
- if (ret < 0)
- {
- printf("ERROR creating chwallpolicy buffer.\n");
- return -1;
- }
- bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
-
- /* dump it and then push it down into xen/acm */
- acm_dump_policy_buffer(push_buffer, ntohl(bin_pol->len));
-
- op.cmd = ACM_SETPOLICY;
- op.interface_version = ACM_INTERFACE_VERSION;
- op.u.setpolicy.pushcache = (void *) push_buffer;
- op.u.setpolicy.pushcache_size = ntohl(bin_pol->len);
- ret = do_acm_op(xc_handle, &op);
-
- if (ret)
- printf("ERROR setting policy. Use 'xm dmesg' to see details.\n");
- else
- printf("Successfully changed policy.\n");
-
- return ret;
-}
-
/******************************* get policy ******************************/
#define PULL_CACHE_SIZE 8192
@@ -602,7 +427,6 @@
void usage(char *progname)
{
printf("Use: %s \n"
- "\t setpolicy\n"
"\t getpolicy\n"
"\t dumpstats\n"
"\t loadpolicy <binary policy file>\n", progname);
@@ -612,7 +436,7 @@
int main(int argc, char **argv)
{
- int acm_cmd_fd, ret;
+ int acm_cmd_fd, ret = 0;
if (argc < 2)
usage(argv[0]);
@@ -623,12 +447,7 @@
exit(-1);
}
- if (!strcmp(argv[1], "setpolicy"))
- {
- if (argc != 2)
- usage(argv[0]);
- ret = acm_domain_setpolicy(acm_cmd_fd);
- } else if (!strcmp(argv[1], "getpolicy")) {
+ if (!strcmp(argv[1], "getpolicy")) {
if (argc != 2)
usage(argv[0]);
ret = acm_domain_getpolicy(acm_cmd_fd);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/sv/inc/style.css
--- a/tools/sv/inc/style.css Wed Aug 24 02:43:18 2005
+++ b/tools/sv/inc/style.css Thu Aug 25 22:53:20 2005
@@ -1,32 +1,95 @@
+.small {
+ font-size: 10px
+}
-P {font-family: verdana, arial; font-size: 12px; color: black}
-.small {font-size: 10px}
+TD.domainInfo {
+ font-size: 10px;
+ color: black
+}
-TD.domainInfo {font-family: verdana, arial; font-size: 10px; color: black}
-TD.domainInfoHead {font-family: verdana, arial; font-size: 10px; color: white;
font-face: bold}
+TD.domainInfoHead {
+ font-size: 10px;
+ color: white;
+ font-face: bold
+}
TD.domainInfoHead {background-color: black}
TR.domainInfoOdd {background-color: white}
TR.domainInfoEven {background-color: lightgrey}
body {
- width: 670px;
- margin: 0px;
- padding: 0px;
- background-color: #fff;
- background-image: url(../images/orb_02.jpg);
- background-repeat: repeat-y;
- background-position: left top;
- font-family: Arial, Helvetica, sans-serif;
- font-weight: bold;
- color: #333333;
- letter-spacing: 0px;
- scrollbar-base-color: #333333;
- scrollbar-track-color: #666666;
- scrollbar-face-color: #fff;
-
-
- }
-
-.button (cursor:hand)
-
+ margin: 0px;
+ padding: 0px;
+ font-family: Arial, Helvetica, sans-serif;
+ font-size: 12px;
+ color: #000000;
+}
+
+div#menu {
+ position: absolute;
+ left: 10px;
+ top: 10px;
+ width: 160px;
+ padding: 10px;
+ border: 0px solid black;
+ text-align: center;
+}
+
+div#main {
+ position: absolute;
+ left: 200px;
+ top: 10px;
+ right: 10px;
+ padding: 10px;
+ border: 0px solid black;
+}
+
+div.button {
+ float: right;
+ margin: 10px 0px 0px 10px;
+ padding: 5px;
+ text-align: center;
+ border: 1px solid black;
+ background: gray;
+ cursor: hand;
+}
+
+div.tabButton {
+ position: relative;
+ top: 0px;
+ float: left;
+ margin: 0px 10px -1px 0px;
+ padding: 5px;
+ text-align: center;
+ border: 1px solid black;
+ background: gray;
+ cursor: hand;
+}
+
+div.tabButton#activeTab {
+ top: 0px;
+ background: white;
+ border-color: black black white black;
+}
+
+div.button:hover, div.tabButton:hover {
+ background: white;
+}
+
+div.button a, div.tabButton a {
+ font-size: 12px;
+ font-weight: bold;
+}
+
+div.title {
+ float: right;
+ font-size: 14px;
+ font-weight: bold;
+}
+
+div.tab {
+ overflow: auto;
+ clear: both;
+ border: 1px solid black;
+ padding: 10px;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/sv/index.psp
--- a/tools/sv/index.psp Wed Aug 24 02:43:18 2005
+++ b/tools/sv/index.psp Thu Aug 25 22:53:20 2005
@@ -7,158 +7,29 @@
for path in sys.path:
if debug: req.write( path + "<br/>" )
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.DomList import DomList
-from xen.sv.NodeInfo import NodeInfo
-from xen.sv.DomInfo import DomInfo
-from xen.sv.CreateDomain import CreateDomain
-from xen.sv.MigrateDomain import MigrateDomain
-from xen.sv.SaveDomain import SaveDomain
-from xen.sv.RestoreDomain import RestoreDomain
-
-from xen.xend.XendClient import server
-
-from xen.sv.util import getVar
-
-# adapter to make this all work with mod_python
-# (c) Tom Wilkie 2005
-
-class TwistedAdapter:
-
- def write( self, text ):
- req.write( text )
-
- class Args:
-
- from mod_python.util import FieldStorage
-
- fieldStorage = FieldStorage( req, True )
-
- # return a list of values for the given key,
- # or None if key not there
- def get( self, var ):
- retVar = self.fieldStorage.getlist( var )
- if len( retVar ) == 0:
- return None
- else:
- return retVar
-
- # return a list of tuples,
- # (key, value) where value is a list of values
- def items( self ):
- result = [];
- for key in self.fieldStorage.keys():
- result.append( (key, self.fieldStorage.getlist( key ) ) )
- return result
-
- args = Args()
-
- uri = req.unparsed_uri
-
-
-class Main( HTMLBase ):
-
- isLeaf = True
-
- def __init__( self, urlWriter = None ):
- self.modules = { "node": NodeInfo,
- "list": DomList,
- "info": DomInfo,
- "create": CreateDomain,
- "migrate" : MigrateDomain,
- "save" : SaveDomain,
- "restore" : RestoreDomain }
-
- # ordered list of module menus to display
- self.module_menus = [ "node", "create", "migrate", "save",
- "restore", "list" ]
- HTMLBase.__init__(self)
-
- def render_POST( self, request ):
-
- #decide what module post'd the action
-
- args = getVar( 'args', request )
-
- mod = getVar( 'mod', request )
-
- if mod in self.modules and args is None:
- module = self.modules[ mod ]
- #check module exists
- if module:
- module( self.mainUrlWriter ).perform( request )
- else:
- self.perform( request )
-
- return self.render_GET( request )
-
- #TODO: need to make this get the request uri automatically
- def mainUrlWriter( self, module ):
- def fun( f ):
- return "index.psp?mod=%s%s" % ( module, f )
- return fun
-
- def write_BODY( self, request ):
-
- request.write( "\n<table style='border:0px solid black; background:
url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0'
width='780px' height='536px'>\n" )
- request.write( "<tr>\n" )
- request.write( " <td width='15px'> </td>" )
- request.write( " <td width='175px' align='center' valign'center'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
- request.write( " <tr><td height='140px' align='center'
valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
- request.write( " <img src='images/xen.png' width='150' height='75'
border='0'/></a><br/></td></tr>" )
- request.write( " <tr><td height='60px' align='center'><p
class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom
Wilkie</a> 2004</p></td></tr>")
- request.write( " <tr><td align='center' valign='top'>" )
-
- for modName in self.module_menus:
- self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU(
request )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " " )
- request.write( " </td>\n" )
- request.write( " <td width='15px'> </td>" )
- request.write( " <td width='558px' align='left' valign='top'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0'
width='100%' height='100%'>" )
- request.write( " <tr><td height='20px'></td></tr>" )
- request.write( " <tr><td align='center' valign='top'>" )
-
- modName = getVar('mod', request)
-
- if modName not in self.modules:
- request.write( '<p>Please select a module</p>' )
- else:
- module = self.modules[ modName ]
- if module:
- module( self.mainUrlWriter( modName ) ).write_BODY( request )
- else:
- request.write( '<p>Invalid module. Please select another</p>' )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " </td>\n" )
- request.write( " <td width='17px'> </td>" )
- request.write( "</tr>\n" )
-
- request.write( "</table>\n" )
-
-
- def op_destroy( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_destroy( int( dom ), "halt" )
-
- def op_pause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_pause( int( dom ) )
-
- def op_unpause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_unpause( int( dom ) )
+from xen.sv.Main import Main, TwistedAdapter
main = Main()
-
-main.render_POST( TwistedAdapter() )
+request = TwistedAdapter( req )
+main.do_POST( request )
%>
+<html>
+<head>
+ <title>XenSV</title>
+ <script src="inc/script.js"></script>
+ <link rel="StyleSheet" type="text/css" href="inc/style.css">
+</head>
+<body>
+ <form method="post" action="<%=request.uri%>">
+ <div id="menu">
+ <img src="images/xen.png">
+ <% main.render_menu( request ) %>
+ </div>
+ <div id="main">
+ <% main.render_main( request ) %>
+ </div>
+ <input type="hidden" name="op" value="">
+ <input type="hidden" name="args" value="">
+ </form>
+</body>
+</html>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/Makefile
--- a/tools/xcs/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/Makefile Thu Aug 25 22:53:20 2005
@@ -34,10 +34,10 @@
xcsdump: xcsdump.c dump.c
$(CC) $(CFLAGS) -o xcsdump xcsdump.c -L$(XEN_LIBXC) \
- ctrl_interface.c evtchn.c dump.c -lxc
+ ctrl_interface.c evtchn.c dump.c -lxenctrl
$(BIN): $(OBJS)
- $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxc
+ $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxenctrl
$(OBJS): $(HDRS)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/dump.h
--- a/tools/xcs/dump.h Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/dump.h Thu Aug 25 22:53:20 2005
@@ -20,7 +20,7 @@
#define XENCTLD_ERROR_H
#include <stdint.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/io/domain_controller.h>
void dump_msg(const control_msg_t *msg, uint64_t flags);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/xcs.h
--- a/tools/xcs/xcs.h Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/xcs.h Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
#define __XCS_H__
#include <pthread.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/io/domain_controller.h>
#include <xen/linux/privcmd.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/xcsdump.c
--- a/tools/xcs/xcsdump.c Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/xcsdump.c Thu Aug 25 22:53:20 2005
@@ -16,7 +16,7 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <ctype.h>
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/io/domain_controller.h>
#include <getopt.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/Makefile Thu Aug 25 22:53:20 2005
@@ -30,7 +30,7 @@
PROGRAMS = xc_restore xc_save
-LDLIBS = -L$(XEN_LIBXC) -lxc
+LDLIBS = -L$(XEN_LIBXC) -lxenguest -lxenctrl
.PHONY: all
all: build
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/xc_restore.c Thu Aug 25 22:53:20 2005
@@ -7,24 +7,33 @@
*
*/
+#include <err.h>
#include <stdlib.h>
+#include <stdint.h>
#include <stdio.h>
-#include <err.h>
-#include <xc.h>
+#include <xenguest.h>
int
main(int argc, char **argv)
{
- unsigned int xc_fd, io_fd, domid, nr_pfns;
+ unsigned int xc_fd, io_fd, domid, nr_pfns, evtchn;
+ int ret;
+ unsigned long mfn;
- if (argc != 5)
- errx(1, "usage: %s xcfd iofd domid nr_pfns", argv[0]);
+ if (argc != 6)
+ errx(1, "usage: %s xcfd iofd domid nr_pfns evtchn", argv[0]);
xc_fd = atoi(argv[1]);
io_fd = atoi(argv[2]);
domid = atoi(argv[3]);
nr_pfns = atoi(argv[4]);
+ evtchn = atoi(argv[5]);
- return xc_linux_restore(xc_fd, io_fd, domid, nr_pfns);
+ ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, evtchn, &mfn);
+ if (ret == 0) {
+ printf("store-mfn %li\n", mfn);
+ fflush(stdout);
+ }
+ return ret;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/xc_save.c Thu Aug 25 22:53:20 2005
@@ -7,11 +7,12 @@
*
*/
+#include <err.h>
#include <stdlib.h>
+#include <stdint.h>
#include <stdio.h>
-#include <err.h>
-#include <xc.h>
+#include <xenguest.h>
int
main(int argc, char **argv)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/Makefile Thu Aug 25 22:53:20 2005
@@ -1,6 +1,5 @@
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
-LIBDIR = lib
XEN_LIBXC = $(XEN_ROOT)/tools/libxc
INSTALL = install
@@ -25,7 +24,7 @@
TESTFLAGS= -DTESTING
TESTENV = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
-all: xen xenstored libxenstore.a libxenstore-pic.a
+all: xen xenstored libxenstore.so
testcode: xen xs_test xenstored_test xs_random xs_dom0_test
@@ -33,7 +32,7 @@
ln -sf $(XEN_ROOT)/xen/include/public $@
xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o
- $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
xenstored_test: xenstored_core_test.o xenstored_watch_test.o
xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o
fake_libxc.o utils.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
@@ -41,9 +40,9 @@
xs_test: xs_test.o xs_lib.o utils.o
xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
-xs_watch_stress: xs_watch_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
+xs_crashme: xs_crashme.o xs_lib.o talloc.o utils.o
-xs_test.o xs_stress.o xs_watch_stress.o xenstored_core_test.o
xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o
xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS)
$(TESTFLAGS)
+xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o
xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o
talloc_test.o fake_libxc.o xs_crashme.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
xenstored_%_test.o: xenstored_%.c
$(COMPILE.c) -o $@ $<
@@ -54,25 +53,30 @@
talloc_test.o: talloc.c
$(COMPILE.c) -o $@ $<
-LIB_OBJS := xs.o xs_lib.o
-
-LIB_OBJS_A := $(patsubst %.o,libxenstore.a(%.o),$(LIB_OBJS))
-LIB_OBJS_PIC := $(patsubst %.o,libxenstore-pic.a(%.opic),$(LIB_OBJS))
-
-libxenstore.a: $(LIB_OBJS_A)
-
-libxenstore-pic.a: $(LIB_OBJS_PIC)
+libxenstore.so: xs.opic xs_lib.opic
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@
$^
clean: testsuite-clean
- rm -f *.o *.opic *.a
- rm -f xen xenstored xs_random xs_stress xs_watch_stress
+ rm -f *.o *.opic *.so
+ rm -f xen xenstored xs_random xs_stress xs_crashme
rm -f xs_test xenstored_test xs_dom0_test
- -$(RM) $(PROG_DEP)
+ $(RM) $(PROG_DEP)
-check: testsuite-run randomcheck stresstest
+print-dir:
+ @echo -n tools/xenstore:
+
+print-end:
+ @echo
+
+check: print-dir testsuite-fast randomcheck-fast print-end
+
+fullcheck: testsuite-run randomcheck stresstest
testsuite-run: xen xenstored_test xs_test
- $(TESTENV) testsuite/test.sh
+ $(TESTENV) testsuite/test.sh && echo
+
+testsuite-fast: xen xenstored_test xs_test
+ @$(TESTENV) testsuite/test.sh --fast
testsuite-clean:
rm -rf $(TESTDIR)
@@ -81,18 +85,25 @@
# fail.
RANDSEED=$(shell date +%s)
randomcheck: xs_random xenstored_test
- $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED)
- $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED)
+ $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000
$(RANDSEED) && echo
+ $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED) && echo
$(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
-stresstest: xs_stress xs_watch_stress xenstored_test
+crashme: xs_crashme xenstored_test
+ rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog*
/tmp/trace
+ export $(TESTENV); ./xs_crashme 5000 $(RANDSEED) 2>/dev/null
+ if [ -n "`cat /tmp/xs_crashme.vglog*`" ]; then echo Valgrind
complained; cat /tmp/xs_crashme.vglog*; exit 1; fi
+ rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog*
/tmp/trace
+
+randomcheck-fast: xs_random xenstored_test
+ @$(TESTENV) ./xs_random --fast /tmp/xs_random 2000 $(RANDSEED)
+
+stresstest: xs_stress xenstored_test
rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
export $(TESTENV); PID=`./xenstored_test --output-pid
--trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret
- rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
- export $(TESTENV); PID=`./xenstored_test --output-pid`;
./xs_watch_stress; ret=$$?; kill $$PID; exit $$ret
xs_dom0_test: xs_dom0_test.o utils.o
- $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
TAGS:
etags `find . -name '*.[ch]'`
@@ -100,15 +111,14 @@
tarball: clean
cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
-install: xenstored libxenstore.a libxenstore-pic.a
+install: xenstored libxenstore.so
$(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
$(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
$(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
$(INSTALL_DIR) -p $(DESTDIR)/usr/include
$(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DATA) libxenstore-pic.a $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenstore.so $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_DATA) xs.h $(DESTDIR)/usr/include
$(INSTALL_DATA) xs_lib.h $(DESTDIR)/usr/include
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/test.sh
--- a/tools/xenstore/testsuite/test.sh Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/test.sh Thu Aug 25 22:53:20 2005
@@ -7,20 +7,20 @@
{
rm -rf $XENSTORED_ROOTDIR
mkdir $XENSTORED_ROOTDIR
-# Weird failures with this.
- if type valgrind >/dev/null 2>&1; then
- valgrind -q --logfile-fd=3 ./xenstored_test --output-pid
--trace-file=testsuite/tmp/trace --no-fork 3>testsuite/tmp/vgout > /tmp/pid 2>
testsuite/tmp/xenstored_errors &
+ if [ $VALGRIND -eq 1 ]; then
+ valgrind --suppressions=testsuite/vg-suppressions -q ./xenstored_test
--output-pid --trace-file=testsuite/tmp/trace --no-fork > /tmp/pid 2>
testsuite/tmp/xenstored_errors &
while [ ! -s /tmp/pid ]; do sleep 0; done
PID=`cat /tmp/pid`
rm /tmp/pid
else
- PID=`./xenstored_test --output-pid`
+ # We don't get error messages from this, though.
+ PID=`./xenstored_test --output-pid --trace-file=testsuite/tmp/trace`
fi
- if sh -e $2 $1; then
- if [ -s testsuite/tmp/vgout ]; then
+ if ./xs_test $2 $1; then
+ if [ -s testsuite/tmp/xenstored_errors ]; then
kill $PID
- echo VALGRIND errors:
- cat testsuite/tmp/vgout
+ echo Errors:
+ cat testsuite/tmp/xenstored_errors
return 1
fi
echo shutdown | ./xs_test
@@ -33,15 +33,29 @@
fi
}
+if [ x$1 = x--fast ]; then
+ VALGRIND=0
+ SLOWTESTS=""
+ shift
+else
+ if type valgrind >/dev/null 2>&1; then
+ VALGRIND=1
+ else
+ echo "WARNING: valgrind not available" >&2
+ VALGRIND=0
+ fi
+ SLOWTESTS=testsuite/[0-9]*.slowtest
+fi
+
MATCH=${1:-"*"}
-for f in testsuite/[0-9]*.sh; do
+for f in testsuite/[0-9]*.test $SLOWTESTS; do
case `basename $f` in $MATCH) RUN=1;; esac
[ -n "$RUN" ] || continue
- if run_test $f; then
- echo Test $f passed...
+
+ if run_test $f -x >/tmp/out; then
+ echo -n .
else
- echo Test $f failed, running verbosely...
- run_test $f -x || true
+ cat /tmp/out
# That will have filled the screen, repeat message.
echo Test $f failed
exit 1
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/utils.c
--- a/tools/xenstore/utils.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/utils.c Thu Aug 25 22:53:20 2005
@@ -80,30 +80,6 @@
barf("malloc of %zu failed", size);
}
-/* Stevens. */
-void daemonize(void)
-{
- pid_t pid;
-
- /* Separate from our parent via fork, so init inherits us. */
- if ((pid = fork()) < 0)
- barf_perror("Failed to fork daemon");
- if (pid != 0)
- exit(0);
-
- close(STDIN_FILENO);
- close(STDOUT_FILENO);
- close(STDERR_FILENO);
-
- /* Session leader so ^C doesn't whack us. */
- setsid();
- /* Move off any mount points we might be in. */
- chdir("/");
- /* Discard our parent's old-fashioned umask prejudices. */
- umask(0);
-}
-
-
/* This version adds one byte (for nul term) */
void *grab_file(const char *filename, unsigned long *size)
{
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/utils.h
--- a/tools/xenstore/utils.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/utils.h Thu Aug 25 22:53:20 2005
@@ -40,9 +40,6 @@
void *grab_file(const char *filename, unsigned long *size);
void release_file(void *data, unsigned long size);
-/* For writing daemons, based on Stevens. */
-void daemonize(void);
-
/* Signal handling: returns fd to listen on. */
int signal_to_fd(int signal);
void close_signal(int fd);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored.h Thu Aug 25 22:53:20 2005
@@ -1,21 +1,29 @@
-/*
- Simple prototyle Xen Store Daemon providing simple tree-like database.
- Copyright (C) 2005 Rusty Russell IBM Corporation
+/*
+ * Simple prototyle Xen Store Daemon providing simple tree-like database.
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
#ifndef _XENSTORED_H
#define _XENSTORED_H
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_core.c Thu Aug 25 22:53:20 2005
@@ -252,6 +252,7 @@
int ret;
struct buffered_data *out = conn->out;
+ assert(conn->state != BLOCKED);
if (out->inhdr) {
if (verbose)
xprintf("Writing msg %s (%s) out to %p\n",
@@ -289,6 +290,10 @@
talloc_free(out);
queue_next_event(conn);
+
+ /* No longer busy? */
+ if (!conn->out)
+ conn->state = OK;
return true;
}
@@ -418,14 +423,24 @@
return node_dir_inside_transaction(trans, node);
}
+static char *datafile(const char *dir)
+{
+ return talloc_asprintf(dir, "%s/.data", dir);
+}
+
static char *node_datafile(struct transaction *trans, const char *node)
{
- return talloc_asprintf(node, "%s/.data", node_dir(trans, node));
+ return datafile(node_dir(trans, node));
+}
+
+static char *permfile(const char *dir)
+{
+ return talloc_asprintf(dir, "%s/.perms", dir);
}
static char *node_permfile(struct transaction *trans, const char *node)
{
- return talloc_asprintf(node, "%s/.perms", node_dir(trans, node));
+ return permfile(node_dir(trans, node));
}
struct buffered_data *new_buffer(void *ctx)
@@ -492,6 +507,8 @@
conn->waiting_reply = bdata;
} else
conn->out = bdata;
+ assert(conn->state != BLOCKED);
+ conn->state = BUSY;
}
/* Some routines (write, mkdir, etc) just need a non-error return */
@@ -504,11 +521,13 @@
{
unsigned int i;
- for (i = 0; error != xsd_errors[i].errnum; i++)
- if (i == ARRAY_SIZE(xsd_errors) - 1)
- corrupt(conn, "Unknown error %i (%s)", error,
- strerror(error));
-
+ for (i = 0; error != xsd_errors[i].errnum; i++) {
+ if (i == ARRAY_SIZE(xsd_errors) - 1) {
+ eprintf("xenstored: error %i untranslatable", error);
+ i = 0; /* EINVAL */
+ break;
+ }
+ }
send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
strlen(xsd_errors[i].errstring) + 1);
}
@@ -542,21 +561,20 @@
/* We expect one arg in the input: return NULL otherwise. */
static const char *onearg(struct buffered_data *in)
{
- if (get_string(in, 0) != in->used)
+ if (!in->used || get_string(in, 0) != in->used)
return NULL;
return in->buffer;
}
/* If it fails, returns NULL and sets errno. */
-static struct xs_permissions *get_perms(struct transaction *transaction,
- const char *node, unsigned int *num)
+static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
{
unsigned int size;
char *strings;
struct xs_permissions *ret;
int *fd;
- fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0);
+ fd = talloc_open(permfile(dir), O_RDONLY, 0);
if (!fd)
return NULL;
strings = read_all(fd, &size);
@@ -564,14 +582,14 @@
return NULL;
*num = xs_count_strings(strings, size);
- ret = talloc_array(node, struct xs_permissions, *num);
+ ret = talloc_array(dir, struct xs_permissions, *num);
if (!xs_strings_to_perms(ret, *num, strings))
- corrupt(NULL, "Permissions corrupt for %s", node);
+ corrupt(NULL, "Permissions corrupt for %s", dir);
return ret;
}
-static char *perms_to_strings(const char *node,
+static char *perms_to_strings(const void *ctx,
struct xs_permissions *perms, unsigned int num,
unsigned int *len)
{
@@ -583,7 +601,7 @@
if (!xs_perm_to_string(&perms[i], buffer))
return NULL;
- strings = talloc_realloc(node, strings, char,
+ strings = talloc_realloc(ctx, strings, char,
*len + strlen(buffer) + 1);
strcpy(strings + *len, buffer);
*len += strlen(buffer) + 1;
@@ -616,16 +634,23 @@
return 0;
}
+/* Create a self-destructing temporary path */
+static char *temppath(const char *path)
+{
+ char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+ talloc_set_destructor(tmppath, destroy_path);
+ return tmppath;
+}
+
/* Create a self-destructing temporary file */
static char *tempfile(const char *path, void *contents, unsigned int len)
{
int *fd;
- char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+ char *tmppath = temppath(path);
fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
if (!fd)
return NULL;
- talloc_set_destructor(tmppath, destroy_path);
if (!xs_write_all(*fd, contents, len))
return NULL;
@@ -705,44 +730,50 @@
/* Owners and tools get it all... */
if (!id || perms[0].id == id)
- return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER;
+ return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
for (i = 1; i < num; i++)
if (perms[i].id == id)
return perms[i].perms;
return perms[0].perms;
+}
+
+/* What do parents say? */
+static enum xs_perm_type ask_parents(struct connection *conn,
+ const char *node)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ do {
+ node = get_parent(node);
+ perms = get_perms(node_dir(conn->transaction, node), &num);
+ if (perms)
+ break;
+ } while (!streq(node, "/"));
+
+ /* No permission at root? We're in trouble. */
+ if (!perms)
+ corrupt(conn, "No permissions file at root");
+
+ return perm_for_id(conn->id, perms, num);
}
/* We have a weird permissions system. You can allow someone into a
* specific node without allowing it in the parents. If it's going to
* fail, however, we don't want the errno to indicate any information
* about the node. */
-static int check_with_parents(struct connection *conn, const char *node,
+static int errno_from_parents(struct connection *conn, const char *node,
int errnum)
{
- struct xs_permissions *perms;
- unsigned int num;
-
/* We always tell them about memory failures. */
if (errnum == ENOMEM)
return errnum;
- do {
- node = get_parent(node);
- perms = get_perms(conn->transaction, node, &num);
- if (perms)
- break;
- } while (!streq(node, "/"));
-
- /* No permission at root? We're in trouble. */
- if (!perms)
- corrupt(conn, "No permissions file at root");
-
- if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ))
- return EACCES;
-
- return errnum;
+ if (ask_parents(conn, node) & XS_PERM_READ)
+ return errnum;
+ return EACCES;
}
char *canonicalize(struct connection *conn, const char *node)
@@ -773,31 +804,33 @@
return false;
}
- perms = get_perms(conn->transaction, node, &num);
- /* No permissions. If we want to create it and
- * it doesn't exist, check parent directory. */
- if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) {
- char *parent = get_parent(node);
- if (!parent)
- return false;
-
- perms = get_perms(conn->transaction, parent, &num);
- }
- if (!perms) {
- errno = check_with_parents(conn, node, errno);
+ perms = get_perms(node_dir(conn->transaction, node), &num);
+
+ if (perms) {
+ if (perm_for_id(conn->id, perms, num) & perm)
+ return true;
+ errno = EACCES;
return false;
}
- if (perm_for_id(conn->id, perms, num) & perm)
- return true;
-
- errno = check_with_parents(conn, node, EACCES);
+ /* If it's OK not to exist, we consult parents. */
+ if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
+ if (ask_parents(conn, node) & perm)
+ return true;
+ /* Parents say they should not know. */
+ errno = EACCES;
+ return false;
+ }
+
+ /* They might not have permission to even *see* this node, in
+ * which case we return EACCES even if it's ENOENT or EIO. */
+ errno = errno_from_parents(conn, node, errno);
return false;
}
static void send_directory(struct connection *conn, const char *node)
{
- char *path, *reply = talloc_strdup(node, "");
+ char *path, *reply;
unsigned int reply_len = 0;
DIR **dir;
struct dirent *dirent;
@@ -815,6 +848,7 @@
return;
}
+ reply = talloc_strdup(node, "");
while ((dirent = readdir(*dir)) != NULL) {
int len = strlen(dirent->d_name) + 1;
@@ -857,44 +891,64 @@
send_reply(conn, XS_READ, value, size);
}
-/* Create a new directory. Optionally put data in it (if data != NULL) */
-static bool new_directory(struct connection *conn,
- const char *node, void *data, unsigned int datalen)
+/* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
+static bool commit_dir(char *dir)
+{
+ char *dot, *slash, *dest;
+
+ dot = strrchr(dir, '.');
+ slash = strchr(dot, '/');
+ if (slash)
+ *slash = '\0';
+
+ dest = talloc_asprintf(dir, "%.*s", dot - dir, dir);
+ return rename(dir, dest) == 0;
+}
+
+/* Create a temporary directory. Put data in it (if data != NULL) */
+static char *tempdir(struct connection *conn,
+ const char *node, void *data, unsigned int datalen)
{
struct xs_permissions *perms;
char *permstr;
unsigned int num, len;
int *fd;
- char *dir = node_dir(conn->transaction, node);
-
- if (mkdir(dir, 0750) != 0)
- return false;
-
- /* Set destructor so we clean up if neccesary. */
- talloc_set_destructor(dir, destroy_path);
-
- perms = get_perms(conn->transaction, get_parent(node), &num);
+ char *dir;
+
+ dir = temppath(node_dir(conn->transaction, node));
+ if (mkdir(dir, 0750) != 0) {
+ if (errno != ENOENT)
+ return NULL;
+
+ dir = tempdir(conn, get_parent(node), NULL, 0);
+ if (!dir)
+ return NULL;
+
+ dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
+ if (mkdir(dir, 0750) != 0)
+ return NULL;
+ talloc_set_destructor(dir, destroy_path);
+ }
+
+ perms = get_perms(get_parent(dir), &num);
+ assert(perms);
/* Domains own what they create. */
if (conn->id)
perms->id = conn->id;
permstr = perms_to_strings(dir, perms, num, &len);
- fd = talloc_open(node_permfile(conn->transaction, node),
- O_WRONLY|O_CREAT|O_EXCL, 0640);
+ fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
if (!fd || !xs_write_all(*fd, permstr, len))
- return false;
+ return NULL;
if (data) {
- char *datapath = node_datafile(conn->transaction, node);
+ char *datapath = datafile(dir);
fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
if (!fd || !xs_write_all(*fd, data, datalen))
- return false;
- }
-
- /* Finished! */
- talloc_set_destructor(dir, NULL);
- return true;
+ return NULL;
+ }
+ return dir;
}
/* path, flags, data... */
@@ -913,8 +967,7 @@
}
node = canonicalize(conn, vec[0]);
- if (/*suppress error on write outside transaction*/ 0 &&
- !within_transaction(conn->transaction, node)) {
+ if (!within_transaction(conn->transaction, node)) {
send_error(conn, EROFS);
return;
}
@@ -928,9 +981,9 @@
if (streq(vec[1], XS_WRITE_NONE))
mode = XS_PERM_WRITE;
else if (streq(vec[1], XS_WRITE_CREATE))
- mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
- mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
else {
send_error(conn, EINVAL);
return;
@@ -942,6 +995,8 @@
}
if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+ char *dir;
+
/* Does not exist... */
if (errno != ENOENT) {
send_error(conn, errno);
@@ -949,15 +1004,17 @@
}
/* Not going to create it? */
- if (!(mode & XS_PERM_CREATE)) {
+ if (streq(vec[1], XS_WRITE_NONE)) {
send_error(conn, ENOENT);
return;
}
- if (!new_directory(conn, node, in->buffer + offset, datalen)) {
+ dir = tempdir(conn, node, in->buffer + offset, datalen);
+ if (!dir || !commit_dir(dir)) {
send_error(conn, errno);
return;
}
+
} else {
/* Exists... */
if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
@@ -982,8 +1039,11 @@
static void do_mkdir(struct connection *conn, const char *node)
{
+ char *dir;
+ struct stat st;
+
node = canonicalize(conn, node);
- if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) {
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
send_error(conn, errno);
return;
}
@@ -996,7 +1056,14 @@
if (transaction_block(conn, node))
return;
- if (!new_directory(conn, node, NULL, 0)) {
+ /* Must not already exist. */
+ if (lstat(node_dir(conn->transaction, node), &st) == 0) {
+ send_error(conn, EEXIST);
+ return;
+ }
+
+ dir = tempdir(conn, node, NULL, 0);
+ if (!dir || !commit_dir(dir)) {
send_error(conn, errno);
return;
}
@@ -1056,7 +1123,7 @@
return;
}
- perms = get_perms(conn->transaction, node, &num);
+ perms = get_perms(node_dir(conn->transaction, node), &num);
if (!perms) {
send_error(conn, errno);
return;
@@ -1072,7 +1139,7 @@
static void do_set_perms(struct connection *conn, struct buffered_data *in)
{
unsigned int num;
- char *node;
+ char *node, *permstr;
struct xs_permissions *perms;
num = xs_count_strings(in->buffer, in->used);
@@ -1083,7 +1150,7 @@
/* First arg is node name. */
node = canonicalize(conn, in->buffer);
- in->buffer += strlen(in->buffer) + 1;
+ permstr = in->buffer + strlen(in->buffer) + 1;
num--;
if (!within_transaction(conn->transaction, node)) {
@@ -1101,7 +1168,7 @@
}
perms = talloc_array(node, struct xs_permissions, num);
- if (!xs_strings_to_perms(perms, num, in->buffer)) {
+ if (!xs_strings_to_perms(perms, num, permstr)) {
send_error(conn, errno);
return;
}
@@ -1270,8 +1337,10 @@
talloc_free(in);
talloc_set_fail_handler(NULL, NULL);
if (talloc_total_blocks(NULL)
- != talloc_total_blocks(talloc_autofree_context()) + 1)
+ != talloc_total_blocks(talloc_autofree_context()) + 1) {
talloc_report_full(NULL, stderr);
+ abort();
+ }
}
/* Errors in reading or allocating here mean we get out of sync, so we
@@ -1295,8 +1364,10 @@
return;
if (in->hdr.msg.len > PATH_MAX) {
+#ifndef TESTING
syslog(LOG_DAEMON, "Client tried to feed us %i",
in->hdr.msg.len);
+#endif
goto bad_client;
}
@@ -1347,6 +1418,7 @@
consider_message(i);
}
break;
+ case BUSY:
case OK:
break;
}
@@ -1372,6 +1444,7 @@
new->state = OK;
new->blocked_by = NULL;
new->out = new->waiting_reply = NULL;
+ new->waiting_for_ack = NULL;
new->fd = -1;
new->id = 0;
new->domain = NULL;
@@ -1451,6 +1524,7 @@
printf(" state = %s\n",
i->state == OK ? "OK"
: i->state == BLOCKED ? "BLOCKED"
+ : i->state == BUSY ? "BUSY"
: "INVALID");
if (i->id)
printf(" id = %i\n", i->id);
@@ -1516,19 +1590,59 @@
xs_daemon_transactions());
}
+static void write_pidfile(const char *pidfile)
+{
+ char buf[100];
+ int len;
+ int fd;
+
+ fd = open(pidfile, O_RDWR | O_CREAT, 0600);
+ if (fd == -1)
+ barf_perror("Opening pid file %s", pidfile);
+
+ /* We exit silently if daemon already running. */
+ if (lockf(fd, F_TLOCK, 0) == -1)
+ exit(0);
+
+ len = sprintf(buf, "%d\n", getpid());
+ write(fd, buf, len);
+}
+
+/* Stevens. */
+static void daemonize(void)
+{
+ pid_t pid;
+
+ /* Separate from our parent via fork, so init inherits us. */
+ if ((pid = fork()) < 0)
+ barf_perror("Failed to fork daemon");
+ if (pid != 0)
+ exit(0);
+
+ /* Session leader so ^C doesn't whack us. */
+ setsid();
+ /* Move off any mount points we might be in. */
+ chdir("/");
+ /* Discard our parent's old-fashioned umask prejudices. */
+ umask(0);
+}
+
+
static struct option options[] = { { "no-fork", 0, NULL, 'N' },
{ "verbose", 0, NULL, 'V' },
{ "output-pid", 0, NULL, 'P' },
{ "trace-file", 1, NULL, 'T' },
+ { "pid-file", 1, NULL, 'F' },
{ NULL, 0, NULL, 0 } };
int main(int argc, char *argv[])
{
- int opt, *sock, *ro_sock, event_fd, max, tmpout;
+ int opt, *sock, *ro_sock, event_fd, max;
struct sockaddr_un addr;
fd_set inset, outset;
bool dofork = true;
bool outputpid = false;
+ const char *pidfile = NULL;
while ((opt = getopt_long(argc, argv, "DVT:", options, NULL)) != -1) {
switch (opt) {
@@ -1548,10 +1662,19 @@
optarg);
write(tracefd, "\n***\n", strlen("\n***\n"));
break;
+ case 'F':
+ pidfile = optarg;
}
}
if (optind != argc)
barf("%s: No arguments desired", argv[0]);
+
+ if (dofork) {
+ openlog("xenstored", 0, LOG_DAEMON);
+ daemonize();
+ }
+ if (pidfile)
+ write_pidfile(pidfile);
talloc_enable_leak_report_full();
@@ -1599,19 +1722,17 @@
/* Restore existing connections. */
restore_existing_connections();
- /* Debugging: daemonize() closes standard fds, so dup here. */
- tmpout = dup(STDOUT_FILENO);
+ if (outputpid) {
+ printf("%i\n", getpid());
+ fflush(stdout);
+ }
+
+ /* close stdin/stdout now we're ready to accept connections */
if (dofork) {
- openlog("xenstored", 0, LOG_DAEMON);
- daemonize();
- }
-
- if (outputpid) {
- char buffer[20];
- sprintf(buffer, "%i\n", getpid());
- write(tmpout, buffer, strlen(buffer));
- }
- close(tmpout);
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+ }
#ifdef TESTING
signal(SIGUSR1, stop_failtest);
@@ -1621,6 +1742,7 @@
max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
/* Main loop. */
+ /* FIXME: Rewrite so noone can starve. */
for (;;) {
struct connection *i;
struct timeval *tvp = NULL, tv;
@@ -1665,10 +1787,22 @@
}
}
- /* Flush output for domain connections, */
- list_for_each_entry(i, &connections, list)
- if (i->domain && i->out)
+ /* Handle all possible I/O for domain connections. */
+ more:
+ list_for_each_entry(i, &connections, list) {
+ if (!i->domain)
+ continue;
+
+ if (domain_can_read(i)) {
+ handle_input(i);
+ goto more;
+ }
+
+ if (domain_can_write(i)) {
handle_output(i);
+ goto more;
+ }
+ }
if (tvp) {
check_transaction_timeout();
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_core.h Thu Aug 25 22:53:20 2005
@@ -51,6 +51,8 @@
{
/* Blocked by transaction. */
BLOCKED,
+ /* Doing action, not listening */
+ BUSY,
/* Completed */
OK,
};
@@ -65,7 +67,7 @@
/* Who am I? 0 for socket connections. */
domid_t id;
- /* Blocked on transaction? */
+ /* Blocked on transaction? Busy? */
enum state state;
/* Node we are waiting for (if state == BLOCKED) */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_domain.c Thu Aug 25 22:53:20 2005
@@ -227,32 +227,27 @@
return NULL;
}
+/* We scan all domains rather than use the information given here. */
void handle_event(int event_fd)
{
u16 port;
- struct domain *domain;
if (read(event_fd, &port, sizeof(port)) != sizeof(port))
barf_perror("Failed to read from event fd");
-
- /* We have to handle *all* the data available before we ack:
- * careful that handle_input/handle_output can destroy conn.
- */
- while ((domain = find_domain(port)) != NULL) {
- if (domain->conn->state == OK
- && buffer_has_input(domain->input))
- handle_input(domain->conn);
- else if (domain->conn->out
- && buffer_has_output_room(domain->output))
- handle_output(domain->conn);
- else
- break;
- }
-
#ifndef TESTING
if (write(event_fd, &port, sizeof(port)) != sizeof(port))
barf_perror("Failed to write to event fd");
#endif
+}
+
+bool domain_can_read(struct connection *conn)
+{
+ return conn->state == OK && buffer_has_input(conn->domain->input);
+}
+
+bool domain_can_write(struct connection *conn)
+{
+ return conn->out && buffer_has_output_room(conn->domain->output);
}
static struct domain *new_domain(void *context, domid_t domid,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_domain.h
--- a/tools/xenstore/xenstored_domain.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_domain.h Thu Aug 25 22:53:20 2005
@@ -40,4 +40,8 @@
/* Read existing connection information from store. */
void restore_existing_connections(void);
+/* Can connection attached to domain read/write. */
+bool domain_can_read(struct connection *conn);
+bool domain_can_write(struct connection *conn);
+
#endif /* _XENSTORED_DOMAIN_H */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_watch.c Thu Aug 25 22:53:20 2005
@@ -95,9 +95,18 @@
return 0;
}
-static void add_event(struct watch *watch, const char *node)
+static void add_event(struct connection *conn,
+ struct watch *watch, const char *node)
{
struct watch_event *event;
+
+ /* Check read permission: no permission, no watch event.
+ * If it doesn't exist, we need permission to read parent.
+ */
+ if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK)) {
+ fprintf(stderr, "No permission for %s\n", node);
+ return;
+ }
if (watch->relative_path) {
node += strlen(watch->relative_path);
@@ -132,9 +141,9 @@
list_for_each_entry(watch, &i->watches, list) {
if (is_child(node, watch->node))
- add_event(watch, node);
+ add_event(i, watch, node);
else if (recurse && is_child(watch->node, node))
- add_event(watch, watch->node);
+ add_event(i, watch, watch->node);
else
continue;
/* If connection not doing anything, queue this. */
@@ -206,7 +215,7 @@
relative = !strstarts(vec[0], "/");
vec[0] = canonicalize(conn, vec[0]);
- if (!check_node_perms(conn, vec[0], XS_PERM_READ)) {
+ if (!is_valid_nodename(vec[0])) {
send_error(conn, errno);
return;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs.c Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
Xen Store Daemon interface providing simple tree-like database.
Copyright (C) 2005 Rusty Russell IBM Corporation
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <sys/types.h>
@@ -204,13 +204,19 @@
return NULL;
}
- assert(msg.type == type);
+ if (msg.type != type) {
+ free(ret);
+ saved_errno = EBADF;
+ goto close_fd;
+
+ }
return ret;
fail:
/* We're in a bad state, so close fd. */
saved_errno = errno;
sigaction(SIGPIPE, &oldact, NULL);
+close_fd:
close(h->fd);
h->fd = -1;
errno = saved_errno;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs.h Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
Xen Store Daemon providing simple tree-like database.
Copyright (C) 2005 Rusty Russell IBM Corporation
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _XS_H
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_dom0_test.c
--- a/tools/xenstore/xs_dom0_test.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_dom0_test.c Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
#include <sys/ioctl.h>
#include "xs.h"
#include "utils.h"
-#include <xc.h>
+#include <xenctrl.h>
#include <xen/linux/privcmd.h>
#include <stdio.h>
#include <unistd.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_lib.c
--- a/tools/xenstore/xs_lib.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_lib.c Thu Aug 25 22:53:20 2005
@@ -1,3 +1,22 @@
+/*
+ Common routines between Xen store user library and daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
#include "xs_lib.h"
#include <unistd.h>
#include <stdio.h>
@@ -133,8 +152,9 @@
unsigned int num;
const char *p;
- for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
- num++;
+ for (p = strings, num = 0; p < strings + len; p++)
+ if (*p == '\0')
+ num++;
return num;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_lib.h
--- a/tools/xenstore/xs_lib.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_lib.h Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
Common routines between Xen store user library and daemon.
Copyright (C) 2005 Rusty Russell IBM Corporation
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _XS_LIB_H
@@ -22,7 +22,7 @@
#include <stdbool.h>
#include <limits.h>
-#include <xc.h>
+#include <xenctrl.h>
/* Bitmask of permissions. */
enum xs_perm_type {
@@ -30,7 +30,7 @@
XS_PERM_READ = 1,
XS_PERM_WRITE = 2,
/* Internal use. */
- XS_PERM_CREATE = 4,
+ XS_PERM_ENOENT_OK = 4,
XS_PERM_OWNER = 8,
};
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_random.c Thu Aug 25 22:53:20 2005
@@ -303,6 +303,34 @@
return true;
}
+static char *parent_filename(const char *name)
+{
+ char *slash = strrchr(name + 1, '/');
+ if (!slash)
+ return talloc_strdup(name, "/");
+ return talloc_asprintf(name, "%.*s", slash-name, name);
+}
+
+static void make_dirs(const char *filename)
+{
+ struct stat st;
+
+ if (lstat(filename, &st) == 0 && S_ISREG(st.st_mode))
+ convert_to_dir(filename);
+
+ if (mkdir(filename, 0700) == 0) {
+ init_perms(filename);
+ return;
+ }
+ if (errno == EEXIST)
+ return;
+
+ make_dirs(parent_filename(filename));
+ if (mkdir(filename, 0700) != 0)
+ barf_perror("Failed to mkdir %s", filename);
+ init_perms(filename);
+}
+
static bool file_write(struct file_ops_info *info,
const char *path, const void *data,
unsigned int len, int createflags)
@@ -329,6 +357,9 @@
}
}
+ if (createflags & O_CREAT)
+ make_dirs(parent_filename(filename));
+
fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
if (fd < 0) {
/* FIXME: Another hack. */
@@ -349,19 +380,13 @@
{
char *dirname = path_to_name(info, path);
- /* Same effective order as daemon, so error returns are right. */
- if (mkdir(dirname, 0700) != 0) {
- if (errno != ENOENT && errno != ENOTDIR)
- write_ok(info, path);
- return false;
- }
-
- if (!write_ok(info, path)) {
- int saved_errno = errno;
- rmdir(dirname);
- errno = saved_errno;
- return false;
- }
+ if (!write_ok(info, path))
+ return false;
+
+ make_dirs(parent_filename(dirname));
+ if (mkdir(dirname, 0700) != 0)
+ return false;
+
init_perms(dirname);
return true;
}
@@ -427,7 +452,7 @@
}
if (abort) {
- cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base);
+ cmd = talloc_asprintf(NULL, "rm -rf %s", info->transact_base);
do_command(cmd);
goto success;
}
@@ -984,13 +1009,15 @@
static void setup_file_ops(const char *dir)
{
- char *cmd = talloc_asprintf(NULL, "echo -n r0 > %s/.perms", dir);
+ struct xs_permissions perm = { .id = 0, .perms = XS_PERM_READ };
+ struct file_ops_info *h = file_handle(dir);
if (mkdir(dir, 0700) != 0)
barf_perror("Creating directory %s", dir);
- if (mkdir(talloc_asprintf(cmd, "%s/tool", dir), 0700) != 0)
+ if (mkdir(talloc_asprintf(h, "%s/tool", dir), 0700) != 0)
barf_perror("Creating directory %s/tool", dir);
- do_command(cmd);
- talloc_free(cmd);
+ if (!file_set_perms(h, talloc_strdup(h, "/"), &perm, 1))
+ barf_perror("Setting root perms in %s", dir);
+ file_close(h);
}
static void setup_xs_ops(void)
@@ -1009,8 +1036,8 @@
} else {
dup2(fds[1], STDOUT_FILENO);
close(fds[0]);
-#if 0
- execlp("valgrind", "valgrind", "xenstored_test", "--output-pid",
+#if 1
+ execlp("valgrind", "valgrind", "-q",
"--suppressions=testsuite/vg-suppressions", "xenstored_test", "--output-pid",
"--no-fork", NULL);
#else
execlp("./xenstored_test", "xenstored_test", "--output-pid",
@@ -1112,9 +1139,6 @@
data->ops->close(pre);
}
}
- if (data->print_progress)
- printf("\n");
-
out:
data->ops->close(h);
return i;
@@ -1192,10 +1216,9 @@
try = try_simple(NULL, iters, verbose, &data);
if (try == iters) {
cleanup_xs_ops();
- printf("Succeeded\n");
exit(0);
}
- printf("Failed on iteration %u\n", try + 1);
+ printf("Failed on iteration %u of seed %u\n", try + 1, seed);
data.print_progress = false;
reduce_problem(try + 1, try_simple, &data);
}
@@ -1406,8 +1429,6 @@
talloc_free(fileh_pre);
}
}
- if (data->print_progress)
- printf("\n");
fail = NULL;
if (data->fast)
@@ -1435,10 +1456,9 @@
try = try_diff(NULL, iters, verbose, &data);
if (try == iters) {
cleanup_xs_ops();
- printf("Succeeded\n");
exit(0);
}
- printf("Failed on iteration %u\n", try + 1);
+ printf("Failed on iteration %u of seed %u\n", try + 1, seed);
data.print_progress = false;
reduce_problem(try + 1, try_diff, &data);
}
@@ -1593,8 +1613,6 @@
xs_close(tmpxsh);
file_close(tmpfileh);
}
-
- printf("Total %u of %u not aborted\n", tried - aborted, tried);
out:
if (xsh)
xs_close(xsh);
@@ -1615,10 +1633,9 @@
try = try_fail(NULL, iters, verbose, &data);
if (try == iters) {
cleanup_xs_ops();
- printf("Succeeded\n");
exit(0);
}
- printf("Failed on iteration %u\n", try + 1);
+ printf("Failed on iteration %u of seed %u\n", try + 1, seed);
fflush(stdout);
data.print_progress = false;
reduce_problem(try + 1, try_fail, &data);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_test.c Thu Aug 25 22:53:20 2005
@@ -17,6 +17,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
@@ -28,16 +29,25 @@
#include <stdbool.h>
#include <stdlib.h>
#include <sys/mman.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <string.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/time.h>
#include "utils.h"
#include "xs_lib.h"
+#include "list.h"
#define XSTEST
static struct xs_handle *handles[10] = { NULL };
-static unsigned int children;
-
-static bool timeout = true;
+
+static unsigned int timeout_ms = 200;
+static bool timeout_suppressed = true;
static bool readonly = false;
+static bool print_input = false;
+static unsigned int linenum = 0;
struct ringbuf_head
{
@@ -178,7 +188,7 @@
static void __attribute__((noreturn)) usage(void)
{
barf("Usage:\n"
- " xs_test [--readonly] [--notimeout]\n"
+ " xs_test [--readonly] [--no-timeout] [-x]\n"
"Reads commands from stdin, one per line:"
" dir <path>\n"
" read <path>\n"
@@ -190,8 +200,6 @@
" setperm <path> <id> <flags> ...\n"
" shutdown\n"
" watch <path> <token>\n"
- " async <command>...\n"
- " asyncwait\n"
" waitwatch\n"
" ackwatch <token>\n"
" unwatch <path> <token>\n"
@@ -200,7 +208,13 @@
" abort\n"
" introduce <domid> <mfn> <eventchn> <path>\n"
" commit\n"
- " sleep <seconds>\n"
+ " sleep <milliseconds>\n"
+ " expect <pattern>\n"
+ " notimeout\n"
+ " readonly\n"
+ " readwrite\n"
+ " noackwrite <path> <flags> <value>...\n"
+ " readack\n"
" dump\n");
}
@@ -218,7 +232,7 @@
return off;
}
-static char *arg(char *line, unsigned int num)
+static char *arg(const char *line, unsigned int num)
{
static char *args[10];
unsigned int off, len;
@@ -236,12 +250,64 @@
return args[num];
}
+struct expect
+{
+ struct list_head list;
+ char *pattern;
+};
+static LIST_HEAD(expects);
+
static char *command;
-static void __attribute__((noreturn)) failed(int handle)
+
+/* Trim leading and trailing whitespace */
+static void trim(char *str)
+{
+ while (isspace(str[0]))
+ memmove(str, str+1, strlen(str));
+
+ while (strlen(str) && isspace(str[strlen(str)-1]))
+ str[strlen(str)-1] = '\0';
+}
+
+static void output(const char *fmt, ...)
+{
+ char *str;
+ struct expect *i;
+ va_list arglist;
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ printf("%s", str);
+ fflush(stdout);
+ trim(str);
+ list_for_each_entry(i, &expects, list) {
+ if (fnmatch(i->pattern, str, 0) == 0) {
+ list_del(&i->list);
+ free(i);
+ return;
+ }
+ }
+ barf("Unexpected output %s\n", str);
+}
+
+static void failed(int handle)
{
if (handle)
- barf_perror("%i: %s", handle, command);
- barf_perror("%s", command);
+ output("%i: %s failed: %s\n",
+ handle, command, strerror(errno));
+ else
+ output("%s failed: %s\n", command, strerror(errno));
+}
+
+static void expect(const char *line)
+{
+ struct expect *e = malloc(sizeof(*e));
+
+ e->pattern = strdup(line + argpos(line, 1));
+ trim(e->pattern);
+ list_add(&e->list, &expects);
}
static void do_dir(unsigned int handle, char *path)
@@ -250,14 +316,16 @@
unsigned int i, num;
entries = xs_directory(handles[handle], path, &num);
- if (!entries)
- failed(handle);
+ if (!entries) {
+ failed(handle);
+ return;
+ }
for (i = 0; i < num; i++)
if (handle)
- printf("%i:%s\n", handle, entries[i]);
+ output("%i:%s\n", handle, entries[i]);
else
- printf("%s\n", entries[i]);
+ output("%s\n", entries[i]);
free(entries);
}
@@ -267,15 +335,17 @@
unsigned int len;
value = xs_read(handles[handle], path, &len);
- if (!value)
- failed(handle);
+ if (!value) {
+ failed(handle);
+ return;
+ }
/* It's supposed to nul terminate for us. */
assert(value[len] == '\0');
if (handle)
- printf("%i:%.*s\n", handle, len, value);
+ output("%i:%.*s\n", handle, len, value);
else
- printf("%.*s\n", len, value);
+ output("%.*s\n", len, value);
}
static void do_write(unsigned int handle, char *path, char *flags, char *data)
@@ -297,6 +367,45 @@
failed(handle);
}
+static void do_noackwrite(unsigned int handle,
+ char *path, const char *flags, char *data)
+{
+ struct xsd_sockmsg msg;
+
+ /* Format: Flags (as string), path, data. */
+ if (streq(flags, "none"))
+ flags = XS_WRITE_NONE;
+ else if (streq(flags, "create"))
+ flags = XS_WRITE_CREATE;
+ else if (streq(flags, "excl"))
+ flags = XS_WRITE_CREATE_EXCL;
+ else
+ barf("noackwrite flags 'none', 'create' or 'excl' only");
+
+ msg.len = strlen(path) + 1 + strlen(flags) + 1 + strlen(data);
+ msg.type = XS_WRITE;
+ if (!write_all_choice(handles[handle]->fd, &msg, sizeof(msg)))
+ failed(handle);
+ if (!write_all_choice(handles[handle]->fd, path, strlen(path) + 1))
+ failed(handle);
+ if (!write_all_choice(handles[handle]->fd, flags, strlen(flags) + 1))
+ failed(handle);
+ if (!write_all_choice(handles[handle]->fd, data, strlen(data)))
+ failed(handle);
+ /* Do not wait for ack. */
+}
+
+static void do_readack(unsigned int handle)
+{
+ enum xsd_sockmsg_type type;
+ char *ret;
+
+ ret = read_reply(handles[handle]->fd, &type, NULL);
+ if (!ret)
+ failed(handle);
+ free(ret);
+}
+
static void do_setid(unsigned int handle, char *id)
{
if (!xs_bool(xs_debug_command(handles[handle], "setid", id,
@@ -322,8 +431,10 @@
struct xs_permissions *perms;
perms = xs_get_permissions(handles[handle], path, &num);
- if (!perms)
- failed(handle);
+ if (!perms) {
+ failed(handle);
+ return;
+ }
for (i = 0; i < num; i++) {
char *permstring;
@@ -346,9 +457,9 @@
}
if (handle)
- printf("%i:%i %s\n", handle, perms[i].id, permstring);
+ output("%i:%i %s\n", handle, perms[i].id, permstring);
else
- printf("%i %s\n", perms[i].id, permstring);
+ output("%i %s\n", perms[i].id, permstring);
}
free(perms);
}
@@ -396,18 +507,56 @@
failed(handle);
}
+static void set_timeout(void)
+{
+ struct itimerval timeout;
+
+ timeout.it_value.tv_sec = timeout_ms / 1000;
+ timeout.it_value.tv_usec = (timeout_ms * 1000) % 1000000;
+ timeout.it_interval.tv_sec = timeout.it_interval.tv_usec = 0;
+ setitimer(ITIMER_REAL, &timeout, NULL);
+}
+
+static void disarm_timeout(void)
+{
+ struct itimerval timeout;
+
+ timeout.it_value.tv_sec = 0;
+ timeout.it_value.tv_usec = 0;
+ setitimer(ITIMER_REAL, &timeout, NULL);
+}
+
static void do_waitwatch(unsigned int handle)
{
char **vec;
+ struct timeval tv = {.tv_sec = timeout_ms/1000,
+ .tv_usec = (timeout_ms*1000)%1000000 };
+ fd_set set;
+
+ if (xs_fileno(handles[handle]) != -2) {
+ /* Manually select here so we can time out gracefully. */
+ FD_ZERO(&set);
+ FD_SET(xs_fileno(handles[handle]), &set);
+ disarm_timeout();
+ if (select(xs_fileno(handles[handle])+1, &set,
+ NULL, NULL, &tv) == 0) {
+ errno = ETIMEDOUT;
+ failed(handle);
+ return;
+ }
+ set_timeout();
+ }
vec = xs_read_watch(handles[handle]);
- if (!vec)
- failed(handle);
+ if (!vec) {
+ failed(handle);
+ return;
+ }
if (handle)
- printf("%i:%s:%s\n", handle, vec[0], vec[1]);
+ output("%i:%s:%s\n", handle, vec[0], vec[1]);
else
- printf("%s:%s\n", vec[0], vec[1]);
+ output("%s:%s\n", vec[0], vec[1]);
free(vec);
}
@@ -415,82 +564,6 @@
{
if (!xs_acknowledge_watch(handles[handle], token))
failed(handle);
-}
-
-static bool wait_for_input(unsigned int handle)
-{
- unsigned int i;
- for (i = 0; i < ARRAY_SIZE(handles); i++) {
- int fd;
-
- if (!handles[i] || i == handle)
- continue;
-
- fd = xs_fileno(handles[i]);
- if (fd == -2) {
- unsigned int avail;
- get_input_chunk(in, in->buf, &avail);
- if (avail != 0)
- return true;
- } else {
- struct timeval tv = {.tv_sec = 0, .tv_usec = 0 };
- fd_set set;
-
- FD_ZERO(&set);
- FD_SET(fd, &set);
- if (select(fd+1, &set, NULL, NULL,&tv))
- return true;
- }
- }
- return false;
-}
-
-
-/* Async wait for watch on handle */
-static void do_command(unsigned int default_handle, char *line);
-static void do_async(unsigned int handle, char *line)
-{
- int child;
- unsigned int i;
- children++;
- if ((child = fork()) != 0) {
- /* Wait until *something* happens, which indicates
- * child has created an event. V. sloppy, but we can't
- * select on fake domain connections.
- */
- while (!wait_for_input(handle));
- return;
- }
-
- /* Don't keep other handles open in parent. */
- for (i = 0; i < ARRAY_SIZE(handles); i++) {
- if (handles[i] && i != handle) {
- xs_daemon_close(handles[i]);
- handles[i] = NULL;
- }
- }
-
- do_command(handle, line + argpos(line, 1));
- exit(0);
-}
-
-static void do_asyncwait(unsigned int handle)
-{
- int status;
-
- if (handle)
- barf("handle has no meaning with asyncwait");
-
- if (children == 0)
- barf("No children to wait for!");
-
- if (waitpid(0, &status, 0) > 0) {
- if (!WIFEXITED(status))
- barf("async died");
- if (WEXITSTATUS(status))
- exit(WEXITSTATUS(status));
- }
- children--;
}
static void do_unwatch(unsigned int handle, const char *node, const char
*token)
@@ -519,6 +592,9 @@
{
unsigned int i;
int fd;
+
+ /* This mechanism is v. slow w. valgrind running. */
+ timeout_ms = 5000;
/* We poll, so ignore signal */
signal(SIGUSR2, SIG_IGN);
@@ -538,14 +614,17 @@
*(int *)((void *)out + 32) = getpid();
*(u16 *)((void *)out + 36) = atoi(eventchn);
+ if (!xs_introduce_domain(handles[handle], atoi(domid),
+ atol(mfn), atoi(eventchn), path)) {
+ failed(handle);
+ munmap(out, getpagesize());
+ return;
+ }
+ output("handle is %i\n", i);
+
/* Create new handle. */
handles[i] = new(struct xs_handle);
handles[i]->fd = -2;
-
- if (!xs_introduce_domain(handles[handle], atoi(domid),
- atol(mfn), atoi(eventchn), path))
- failed(handle);
- printf("handle is %i\n", i);
/* Read in daemon pid. */
daemon_pid = *(int *)((void *)out + 32);
@@ -593,18 +672,20 @@
sprintf(subnode, "%s/%s", node, dir[i]);
perms = xs_get_permissions(handles[handle], subnode,&numperms);
- if (!perms)
+ if (!perms) {
failed(handle);
-
- printf("%s%s: ", spacing, dir[i]);
+ return;
+ }
+
+ output("%s%s: ", spacing, dir[i]);
for (j = 0; j < numperms; j++) {
char buffer[100];
if (!xs_perm_to_string(&perms[j], buffer))
barf("perm to string");
- printf("%s ", buffer);
+ output("%s ", buffer);
}
free(perms);
- printf("\n");
+ output("\n");
/* Even directories can have contents. */
contents = xs_read(handles[handle], subnode, &len);
@@ -612,14 +693,16 @@
if (errno != EISDIR)
failed(handle);
} else {
- printf(" %s(%.*s)\n", spacing, len, contents);
+ output(" %s(%.*s)\n", spacing, len, contents);
free(contents);
}
/* Every node is a directory. */
subdirs = xs_directory(handles[handle], subnode, &subnum);
- if (!subdirs)
+ if (!subdirs) {
failed(handle);
+ return;
+ }
dump_dir(handle, subnode, subdirs, subnum, depth+1);
free(subdirs);
}
@@ -631,8 +714,10 @@
unsigned int subnum;
subdirs = xs_directory(handles[handle], "/", &subnum);
- if (!subdirs)
- failed(handle);
+ if (!subdirs) {
+ failed(handle);
+ return;
+ }
dump_dir(handle, "", subdirs, subnum, 0);
free(subdirs);
@@ -655,6 +740,9 @@
static void do_command(unsigned int default_handle, char *line)
{
char *endp;
+
+ if (print_input)
+ printf("%i> %s", ++linenum, line);
if (strspn(line, " \n") == strlen(line))
return;
@@ -667,6 +755,7 @@
else
handle = default_handle;
+ command = arg(line, 0);
if (!handles[handle]) {
if (readonly)
handles[handle] = xs_daemon_open_readonly();
@@ -675,10 +764,10 @@
if (!handles[handle])
barf_perror("Opening connection to daemon");
}
- command = arg(line, 0);
-
- if (timeout)
- alarm(1);
+
+ if (!timeout_suppressed)
+ set_timeout();
+ timeout_suppressed = false;
if (streq(command, "dir"))
do_dir(handle, arg(line, 1));
@@ -703,10 +792,6 @@
do_watch(handle, arg(line, 1), arg(line, 2));
else if (streq(command, "waitwatch"))
do_waitwatch(handle);
- else if (streq(command, "async"))
- do_async(handle, line);
- else if (streq(command, "asyncwait"))
- do_asyncwait(handle);
else if (streq(command, "ackwatch"))
do_ackwatch(handle, arg(line, 1));
else if (streq(command, "unwatch"))
@@ -727,32 +812,70 @@
do_release(handle, arg(line, 1));
else if (streq(command, "dump"))
dump(handle);
- else if (streq(command, "sleep"))
- sleep(atoi(arg(line, 1)));
+ else if (streq(command, "sleep")) {
+ disarm_timeout();
+ usleep(atoi(arg(line, 1)) * 1000);
+ } else if (streq(command, "expect"))
+ expect(line);
+ else if (streq(command, "notimeout"))
+ timeout_suppressed = true;
+ else if (streq(command, "readonly")) {
+ readonly = true;
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "readwrite")) {
+ readonly = false;
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "noackwrite"))
+ do_noackwrite(handle, arg(line,1), arg(line,2), arg(line,3));
+ else if (streq(command, "readack"))
+ do_readack(handle);
else
barf("Unknown command %s", command);
fflush(stdout);
- alarm(0);
-}
+ disarm_timeout();
+
+ /* Check expectations. */
+ if (!streq(command, "expect")) {
+ struct expect *i = list_top(&expects, struct expect, list);
+
+ if (i)
+ barf("Expected '%s', didn't happen\n", i->pattern);
+ }
+}
+
+static struct option options[] = { { "readonly", 0, NULL, 'r' },
+ { "no-timeout", 0, NULL, 't' },
+ { NULL, 0, NULL, 0 } };
int main(int argc, char *argv[])
{
+ int opt;
char line[1024];
- if (argc > 1 && streq(argv[1], "--readonly")) {
- readonly = true;
- argc--;
- argv++;
- }
-
- if (argc > 1 && streq(argv[1], "--no-timeout")) {
- timeout = false;
- argc--;
- argv++;
- }
-
- if (argc != 1)
+ while ((opt = getopt_long(argc, argv, "xrt", options, NULL)) != -1) {
+ switch (opt) {
+ case 'r':
+ readonly = true;
+ break;
+ case 't':
+ timeout_ms = 0;
+ break;
+ case 'x':
+ print_input = true;
+ break;
+ }
+ }
+
+ if (optind + 1 == argc) {
+ int fd = open(argv[optind], O_RDONLY);
+ if (!fd)
+ barf_perror("Opening %s", argv[optind]);
+ dup2(fd, STDIN_FILENO);
+ } else if (optind != argc)
usage();
+
/* The size of the ringbuffer: half a page minus head structure. */
ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
@@ -761,7 +884,5 @@
while (fgets(line, sizeof(line), stdin))
do_command(0, line);
- while (children)
- do_asyncwait(0);
return 0;
}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/Makefile Thu Aug 25 22:53:20 2005
@@ -36,4 +36,4 @@
$(RM) *.a *.so *.o *.rpm $(BIN)
%: %.c $(HDRS) Makefile
- $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
+ $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/xenctx.c Thu Aug 25 22:53:20 2005
@@ -21,7 +21,7 @@
#include <argp.h>
#include <signal.h>
-#include "xc.h"
+#include "xenctrl.h"
#ifdef __i386__
void print_ctx(vcpu_guest_context_t *ctx1)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/xentrace.c Thu Aug 25 22:53:20 2005
@@ -45,6 +45,8 @@
char *outfile;
struct timespec poll_sleep;
unsigned long new_data_thresh;
+ u32 evt_mask;
+ u32 cpu_mask;
} settings_t;
settings_t opts;
@@ -93,13 +95,13 @@
/**
* get_tbufs - get pointer to and size of the trace buffers
- * @mach_addr: location to store machine address if the trace buffers to
- * @size: location to store the size of a trace buffer to
+ * @mfn: location to store mfn of the trace buffers to
+ * @size: location to store the size of a trace buffer to
*
* Gets the machine address of the trace pointer area and the size of the
* per CPU buffers.
*/
-void get_tbufs(unsigned long *mach_addr, unsigned long *size)
+void get_tbufs(unsigned long *mfn, unsigned long *size)
{
int ret;
dom0_op_t op; /* dom0 op we'll build */
@@ -119,19 +121,19 @@
exit(EXIT_FAILURE);
}
- *mach_addr = op.u.tbufcontrol.mach_addr;
- *size = op.u.tbufcontrol.size;
+ *mfn = op.u.tbufcontrol.buffer_mfn;
+ *size = op.u.tbufcontrol.size;
}
/**
* map_tbufs - memory map Xen trace buffers into user space
- * @tbufs: machine address of the trace buffers
+ * @tbufs_mfn: mfn of the trace buffers
* @num: number of trace buffers to map
* @size: size of each trace buffer
*
* Maps the Xen trace buffers them into process address space.
*/
-struct t_buf *map_tbufs(unsigned long tbufs_mach, unsigned int num,
+struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
unsigned long size)
{
int xc_handle; /* file descriptor for /proc/xen/privcmd */
@@ -147,7 +149,7 @@
tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */,
size * num, PROT_READ,
- tbufs_mach >> PAGE_SHIFT);
+ tbufs_mfn);
xc_interface_close(xc_handle);
@@ -160,6 +162,41 @@
return tbufs_mapped;
}
+/**
+ * set_mask - set the cpu/event mask in HV
+ * @mask: the new mask
+ * @type: the new mask type,0-event mask, 1-cpu mask
+ *
+ */
+void set_mask(u32 mask, int type)
+{
+ int ret;
+ dom0_op_t op; /* dom0 op we'll build */
+ int xc_handle = xc_interface_open(); /* for accessing control interface */
+
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ if (type == 1) { /* cpu mask */
+ op.u.tbufcontrol.op = DOM0_TBUF_SET_CPU_MASK;
+ op.u.tbufcontrol.cpu_mask = mask;
+ fprintf(stderr, "change cpumask to 0x%x\n", mask);
+ }else if (type == 0) { /* event mask */
+ op.u.tbufcontrol.op = DOM0_TBUF_SET_EVT_MASK;
+ op.u.tbufcontrol.evt_mask = mask;
+ fprintf(stderr, "change evtmask to 0x%x\n", mask);
+ }
+
+ ret = do_dom0_op(xc_handle, &op);
+
+ xc_interface_close(xc_handle);
+
+ if ( ret != 0 )
+ {
+ PERROR("Failure to get trace buffer pointer from Xen and set the new
mask");
+ exit(EXIT_FAILURE);
+ }
+
+}
/**
* init_bufs_ptrs - initialises an array of pointers to the trace buffers
@@ -194,7 +231,7 @@
/**
* init_rec_ptrs - initialises data area pointers to locations in user space
- * @tbufs_mach: machine base address of the trace buffer area
+ * @tbufs_mfn: base mfn of the trace buffer area
* @tbufs_mapped: user virtual address of base of trace buffer area
* @meta: array of user-space pointers to struct t_buf's of metadata
* @num: number of trace buffers
@@ -203,7 +240,7 @@
* mapped in user space. Note that the trace buffer metadata contains machine
* pointers - the array returned allows more convenient access to them.
*/
-struct t_rec **init_rec_ptrs(unsigned long tbufs_mach,
+struct t_rec **init_rec_ptrs(unsigned long tbufs_mfn,
struct t_buf *tbufs_mapped,
struct t_buf **meta,
unsigned int num)
@@ -219,7 +256,7 @@
}
for ( i = 0; i < num; i++ )
- data[i] = (struct t_rec *)(meta[i]->rec_addr - tbufs_mach
+ data[i] = (struct t_rec *)(meta[i]->rec_addr -
(tbufs_mfn<<XC_PAGE_SHIFT) /* XXX */
+ (unsigned long)tbufs_mapped);
return data;
@@ -293,7 +330,7 @@
struct t_rec **data; /* pointers to the trace buffer data areas
* where they are mapped into user space. */
unsigned long *cons; /* store tail indexes for the trace buffers */
- unsigned long tbufs_mach; /* machine address of the tbufs */
+ unsigned long tbufs_mfn; /* mfn of the tbufs */
unsigned int num; /* number of trace buffers / logical CPUS */
unsigned long size; /* size of a single trace buffer */
@@ -303,14 +340,14 @@
num = get_num_cpus();
/* setup access to trace buffers */
- get_tbufs(&tbufs_mach, &size);
- tbufs_mapped = map_tbufs(tbufs_mach, num, size);
+ get_tbufs(&tbufs_mfn, &size);
+ tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
/* build arrays of convenience ptrs */
meta = init_bufs_ptrs (tbufs_mapped, num, size);
- data = init_rec_ptrs (tbufs_mach, tbufs_mapped, meta, num);
+ data = init_rec_ptrs (tbufs_mfn, tbufs_mapped, meta, num);
cons = init_tail_idxs (meta, num);
/* now, scan buffers for events */
@@ -341,6 +378,31 @@
* Various declarations / definitions GNU argp needs to do its work
*****************************************************************************/
+int parse_evtmask(char *arg, struct argp_state *state)
+{
+ settings_t *setup = (settings_t *)state->input;
+ char *inval;
+
+ /* search filtering class */
+ if (strcmp(arg, "gen") == 0){
+ setup->evt_mask |= TRC_GEN;
+ } else if(strcmp(arg, "sched") == 0){
+ setup->evt_mask |= TRC_SCHED;
+ } else if(strcmp(arg, "dom0op") == 0){
+ setup->evt_mask |= TRC_DOM0OP;
+ } else if(strcmp(arg, "vmx") == 0){
+ setup->evt_mask |= TRC_VMX;
+ } else if(strcmp(arg, "all") == 0){
+ setup->evt_mask |= TRC_ALL;
+ } else {
+ setup->evt_mask = strtol(arg, &inval, 0);
+ if ( inval == arg )
+ argp_usage(state);
+ }
+
+ return 0;
+
+}
/* command parser for GNU argp - see GNU docs for more info */
error_t cmd_parser(int key, char *arg, struct argp_state *state)
@@ -366,6 +428,21 @@
argp_usage(state);
}
break;
+
+ case 'c': /* set new cpu mask for filtering*/
+ {
+ char *inval;
+ setup->cpu_mask = strtol(arg, &inval, 0);
+ if ( inval == arg )
+ argp_usage(state);
+ }
+ break;
+
+ case 'e': /* set new event mask for filtering*/
+ {
+ parse_evtmask(arg, state);
+ }
+ break;
case ARGP_KEY_ARG:
{
@@ -397,6 +474,14 @@
.doc =
"Set sleep time, p, in milliseconds between polling the trace buffer "
"for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },
+
+ { .name = "cpu-mask", .key='c', .arg="c",
+ .doc =
+ "set cpu-mask " },
+
+ { .name = "evt-mask", .key='e', .arg="e",
+ .doc =
+ "set evt-mask " },
{0}
};
@@ -430,8 +515,18 @@
opts.outfile = 0;
opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
opts.new_data_thresh = NEW_DATA_THRESH;
+ opts.evt_mask = 0;
+ opts.cpu_mask = 0;
argp_parse(&parser_def, argc, argv, 0, 0, &opts);
+
+ if (opts.evt_mask != 0) {
+ set_mask(opts.evt_mask, 0);
+ }
+
+ if (opts.cpu_mask != 0) {
+ set_mask(opts.evt_mask, 1);
+ }
if ( opts.outfile )
outfd = open(opts.outfile, O_WRONLY | O_CREAT);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/Rules.mk
--- a/xen/Rules.mk Wed Aug 24 02:43:18 2005
+++ b/xen/Rules.mk Thu Aug 25 22:53:20 2005
@@ -2,7 +2,7 @@
# If you change any of these configuration options then you must
# 'make clean' before rebuilding.
#
-verbose ?= n
+verbose ?= y
debug ?= n
perfc ?= n
perfc_arrays?= n
@@ -10,14 +10,6 @@
optimize ?= y
domu_debug ?= n
crash_debug ?= n
-
-# ACM_USE_SECURITY_POLICY is set to security policy of Xen
-# Supported models are:
-# ACM_NULL_POLICY (ACM will not be built with this policy)
-# ACM_CHINESE_WALL_POLICY
-# ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
-# ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
-ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
include $(BASEDIR)/../Config.mk
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/domain.c
--- a/xen/arch/ia64/domain.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/domain.c Thu Aug 25 22:53:20 2005
@@ -1092,3 +1092,12 @@
{
vcpu_pend_interrupt(dom0->vcpu[0],irq);
}
+
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+ if ( v->processor == newcpu )
+ return;
+
+ set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ v->processor = newcpu;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/grant_table.c
--- a/xen/arch/ia64/grant_table.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/grant_table.c Thu Aug 25 22:53:20 2005
@@ -355,7 +355,7 @@
/* Bitwise-OR avoids short-circuiting which screws control flow. */
if ( unlikely(__get_user(dom, &uop->dom) |
__get_user(ref, &uop->ref) |
- __get_user(host_virt_addr, &uop->host_virt_addr) |
+ __get_user(host_virt_addr, &uop->host_addr) |
__get_user(dev_hst_ro_flags, &uop->flags)) )
{
DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
@@ -500,7 +500,7 @@
ld = current->domain;
/* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
+ if ( unlikely(__get_user(virt, &uop->host_addr) |
__get_user(frame, &uop->dev_bus_addr) |
__get_user(handle, &uop->handle)) )
{
@@ -545,15 +545,6 @@
if ( frame == 0 )
{
frame = act->frame;
- }
- else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
- {
- if ( !( flags & GNTMAP_device_map ) )
- PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
- "Bad frame number: frame not mapped for dev access.\n");
- frame = act->frame;
-
- /* Frame will be unmapped for device access below if virt addr okay. */
}
else
{
@@ -615,15 +606,6 @@
act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
: GNTPIN_hstw_inc;
-
- if ( frame == GNTUNMAP_DEV_FROM_VIRT )
- {
- act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
- : GNTPIN_devw_inc;
-
- map->ref_and_flags &= ~GNTMAP_device_map;
- (void)__put_user(0, &uop->dev_bus_addr);
- }
rc = 0;
*va = virt;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/vcpu.c
--- a/xen/arch/ia64/vcpu.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/vcpu.c Thu Aug 25 22:53:20 2005
@@ -585,6 +585,14 @@
set_bit(vector,PSCBX(vcpu,irr));
PSCB(vcpu,pending_interruption) = 1;
}
+
+ /* Keir: I think you should unblock when an interrupt is pending. */
+ {
+ int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags);
+ vcpu_unblock(vcpu);
+ if ( running )
+ smp_send_event_check_cpu(vcpu->processor);
+ }
}
void early_tick(VCPU *vcpu)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xenmisc.c Thu Aug 25 22:53:20 2005
@@ -280,7 +280,6 @@
unsigned long context_switch_count = 0;
-// context_switch
void context_switch(struct vcpu *prev, struct vcpu *next)
{
//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@
//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
#ifdef CONFIG_VTI
- unsigned long psr;
- /* Interrupt is enabled after next task is chosen.
- * So we have to disable it for stack switch.
- */
- local_irq_save(psr);
vtm_domain_out(prev);
- /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
context_switch_count++;
switch_to(prev,next,prev);
#ifdef CONFIG_VTI
- /* Post-setup for new domain */
vtm_domain_in(current);
- local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
// leave this debug for now: it acts as a heartbeat when more than
// one domain is active
{
@@ -315,25 +306,27 @@
if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
if (!i--) { printk("+",id); i = 1000000; }
}
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
- //if (!is_idle_task(next->domain) )
- //send_guest_virq(next, VIRQ_TIMER);
+
#ifdef CONFIG_VTI
if (VMX_DOMAIN(current))
vmx_load_all_rr(current);
- return;
-#else // CONFIG_VTI
+#else
if (!is_idle_task(current->domain)) {
load_region_regs(current);
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ /* nothing to do */
}
void continue_running(struct vcpu *same)
{
- /* nothing to do */
+ /* nothing to do */
}
void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xensetup.c
--- a/xen/arch/ia64/xensetup.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xensetup.c Thu Aug 25 22:53:20 2005
@@ -131,12 +131,14 @@
}
struct ns16550_defaults ns16550_com1 = {
+ .baud = BAUD_AUTO,
.data_bits = 8,
.parity = 'n',
.stop_bits = 1
};
struct ns16550_defaults ns16550_com2 = {
+ .baud = BAUD_AUTO,
.data_bits = 8,
.parity = 'n',
.stop_bits = 1
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xentime.c
--- a/xen/arch/ia64/xentime.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xentime.c Thu Aug 25 22:53:20 2005
@@ -48,7 +48,7 @@
static s_time_t stime_irq = 0x0; /* System time at last 'time
update' */
unsigned long itc_scale;
unsigned long itc_at_irq;
-static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+static unsigned long wc_sec, wc_nsec; /* UTC time at last 'time update'. */
//static rwlock_t time_lock = RW_LOCK_UNLOCKED;
static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs
*regs);
@@ -103,25 +103,22 @@
}
/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
{
#ifdef CONFIG_VTI
- s64 delta;
- long _usecs = (long)usecs;
+ u64 _nsecs;
write_lock_irq(&xtime_lock);
- delta = (s64)(stime_irq - system_time_base);
-
- _usecs += (long)(delta/1000);
- while ( _usecs >= 1000000 )
+ _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base);
+ while ( _nsecs >= 1000000000 )
{
- _usecs -= 1000000;
+ _nsecs -= 1000000000;
secs++;
}
wc_sec = secs;
- wc_usec = _usecs;
+ wc_nsec = (unsigned long)_nsecs;
write_unlock_irq(&xtime_lock);
@@ -290,13 +287,13 @@
/* Wallclock time starts as the initial RTC time. */
efi_gettimeofday(&tm);
wc_sec = tm.tv_sec;
- wc_usec = tm.tv_nsec/1000;
+ wc_nsec = tm.tv_nsec;
printk("Time init:\n");
printk(".... System Time: %ldns\n", NOW());
printk(".... scale: %16lX\n", itc_scale);
- printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec);
+ printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_nsec/1000);
return 0;
}
@@ -338,10 +335,10 @@
(*(unsigned long *)&jiffies_64)++;
/* Update wall time. */
- wc_usec += 1000000/HZ;
- if ( wc_usec >= 1000000 )
+ wc_nsec += 1000000000/HZ;
+ if ( wc_nsec >= 1000000000 )
{
- wc_usec -= 1000000;
+ wc_nsec -= 1000000000;
wc_sec++;
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/audit.c Thu Aug 25 22:53:20 2005
@@ -73,7 +73,7 @@
if ( tcount < 0 )
{
APRINTK("Audit %d: type count went below zero "
- "mfn=%lx t=%x ot=%x",
+ "mfn=%lx t=%" PRtype_info " ot=%x",
d->domain_id, page_to_pfn(page),
page->u.inuse.type_info,
page->tlbflush_timestamp);
@@ -82,7 +82,7 @@
else if ( (tcount & ~PGT_count_mask) != 0 )
{
APRINTK("Audit %d: type count overflowed "
- "mfn=%lx t=%x ot=%x",
+ "mfn=%lx t=%" PRtype_info " ot=%x",
d->domain_id, page_to_pfn(page),
page->u.inuse.type_info,
page->tlbflush_timestamp);
@@ -101,7 +101,7 @@
if ( count < 0 )
{
APRINTK("Audit %d: general count went below zero "
- "mfn=%lx t=%x ot=%x",
+ "mfn=%lx t=%" PRtype_info " ot=%x",
d->domain_id, page_to_pfn(page),
page->u.inuse.type_info,
page->tlbflush_timestamp);
@@ -110,7 +110,7 @@
else if ( (count & ~PGT_count_mask) != 0 )
{
APRINTK("Audit %d: general count overflowed "
- "mfn=%lx t=%x ot=%x",
+ "mfn=%lx t=%" PRtype_info " ot=%x",
d->domain_id, page_to_pfn(page),
page->u.inuse.type_info,
page->tlbflush_timestamp);
@@ -152,7 +152,8 @@
if ( page_type != PGT_l1_shadow )
{
printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
- "Expected Shadow L1 t=%x mfn=%lx\n",
+ "Expected Shadow L1 t=%" PRtype_info
+ " mfn=%lx\n",
d->domain_id, mfn, i,
l1page->u.inuse.type_info, l1mfn);
errors++;
@@ -178,14 +179,14 @@
if ( page_type == PGT_l2_page_table )
{
printk("Audit %d: [%x] Found %s Linear PT "
- "t=%x mfn=%lx\n",
+ "t=%" PRtype_info " mfn=%lx\n",
d->domain_id, i, (l1mfn==mfn) ? "Self" :
"Other",
l1page->u.inuse.type_info, l1mfn);
}
else if ( page_type != PGT_l1_page_table )
{
printk("Audit %d: [L2 mfn=%lx i=%x] "
- "Expected L1 t=%x mfn=%lx\n",
+ "Expected L1 t=%" PRtype_info " mfn=%lx\n",
d->domain_id, mfn, i,
l1page->u.inuse.type_info, l1mfn);
errors++;
@@ -237,7 +238,8 @@
if ( page_get_owner(gpage) != d )
{
printk("Audit %d: [hl2mfn=%lx,i=%x] Skip foreign page "
- "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+ "dom=%p (id=%d) mfn=%lx c=%08x t=%"
+ PRtype_info "\n",
d->domain_id, hl2mfn, i,
page_get_owner(gpage),
page_get_owner(gpage)->domain_id,
@@ -288,7 +290,7 @@
PGT_writable_page) )
{
printk("Audit %d: [l1mfn=%lx, i=%x] Illegal RW "
- "t=%x mfn=%lx\n",
+ "t=%" PRtype_info " mfn=%lx\n",
d->domain_id, l1mfn, i,
gpage->u.inuse.type_info, gmfn);
errors++;
@@ -308,7 +310,8 @@
if ( page_get_owner(gpage) != d )
{
printk("Audit %d: [l1mfn=%lx,i=%x] Skip foreign page "
- "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+ "dom=%p (id=%d) mfn=%lx c=%08x t=%"
+ PRtype_info "\n",
d->domain_id, l1mfn, i,
page_get_owner(gpage),
page_get_owner(gpage)->domain_id,
@@ -454,7 +457,7 @@
if ( shadow_refcounts )
{
printk("Audit %d: found an L2 guest page "
- "mfn=%lx t=%08x c=%08x while in shadow mode\n",
+ "mfn=%lx t=%" PRtype_info " c=%08x while in
shadow mode\n",
d->domain_id, mfn, page->u.inuse.type_info,
page->count_info);
errors++;
@@ -465,14 +468,16 @@
if ( (page->u.inuse.type_info & PGT_validated) !=
PGT_validated )
{
- printk("Audit %d: L2 mfn=%lx not validated %08x\n",
+ printk("Audit %d: L2 mfn=%lx not validated %"
+ PRtype_info "\n",
d->domain_id, mfn, page->u.inuse.type_info);
errors++;
}
if ( (page->u.inuse.type_info & PGT_pinned) !=
PGT_pinned )
{
- printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
+ printk("Audit %d: L2 mfn=%lx not pinned t=%"
+ PRtype_info "\n",
d->domain_id, mfn, page->u.inuse.type_info);
errors++;
}
@@ -494,7 +499,8 @@
{
if ( shadow_refcounts )
{
- printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
+ printk("found an L1 guest page mfn=%lx t=%"
+ PRtype_info " c=%08x "
"while in shadow mode\n",
mfn, page->u.inuse.type_info, page->count_info);
errors++;
@@ -505,7 +511,8 @@
if ( (page->u.inuse.type_info & PGT_validated) !=
PGT_validated )
{
- printk("Audit %d: L1 not validated mfn=%lx
t=%08x\n",
+ printk("Audit %d: L1 not validated mfn=%lx t=%"
+ PRtype_info "\n",
d->domain_id, mfn, page->u.inuse.type_info);
errors++;
}
@@ -514,7 +521,8 @@
{
if ( !VM_ASSIST(d,
VMASST_TYPE_writable_pagetables) )
{
- printk("Audit %d: L1 mfn=%lx not pinned
t=%08x\n",
+ printk("Audit %d: L1 mfn=%lx not pinned t=%"
+ PRtype_info "\n",
d->domain_id, mfn,
page->u.inuse.type_info);
}
}
@@ -621,7 +629,7 @@
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
if ( (pt[i] & _PAGE_PRESENT) && ((pt[i] >> PAGE_SHIFT) == xmfn) )
- printk(" found dom=%d mfn=%lx t=%08x c=%08x "
+ printk(" found dom=%d mfn=%lx t=%" PRtype_info " c=%08x "
"pt[i=%x]=%lx\n",
d->domain_id, mfn, page->u.inuse.type_info,
page->count_info, i, pt[i]);
@@ -754,7 +762,7 @@
if ( (page->u.inuse.type_info & PGT_count_mask) >
(page->count_info & PGC_count_mask) )
{
- printk("taf(%08x) > caf(%08x) mfn=%lx\n",
+ printk("taf(%" PRtype_info ") > caf(%08x) mfn=%lx\n",
page->u.inuse.type_info, page->count_info, mfn);
errors++;
}
@@ -763,8 +771,8 @@
(page_type == PGT_writable_page) &&
!(page->u.inuse.type_info & PGT_validated) )
{
- printk("shadow mode writable page not validated mfn=%lx "
- "t=%08x c=%08x\n",
+ printk("shadow mode writable page not validated mfn=%lx "
+ "t=%" PRtype_info " c=%08x\n",
mfn, page->u.inuse.type_info, page->count_info);
errors++;
}
@@ -774,7 +782,7 @@
(page->u.inuse.type_info & PGT_count_mask) > 1 )
{
printk("writeable page with type count >1: "
- "mfn=%lx t=%08x c=%08x\n",
+ "mfn=%lx t=%" PRtype_info " c=%08x\n",
mfn,
page->u.inuse.type_info,
page->count_info );
@@ -786,7 +794,7 @@
if ( page_type == PGT_none &&
(page->u.inuse.type_info & PGT_count_mask) > 0 )
{
- printk("normal page with type count >0: mfn=%lx t=%08x c=%08x\n",
+ printk("normal page with type count >0: mfn=%lx t=%" PRtype_info "
c=%08x\n",
mfn,
page->u.inuse.type_info,
page->count_info );
@@ -812,7 +820,7 @@
: !(page_type && (page_type <= PGT_l4_page_table)) )
{
printk("out of sync page mfn=%lx has strange type "
- "t=%08x c=%08x\n",
+ "t=%" PRtype_info " c=%08x\n",
mfn, page->u.inuse.type_info, page->count_info);
errors++;
}
@@ -850,7 +858,7 @@
case PGT_l4_page_table:
if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
{
- printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+ printk("Audit %d: type count!=0 t=%" PRtype_info " ot=%x c=%x
mfn=%lx\n",
d->domain_id, page->u.inuse.type_info,
page->tlbflush_timestamp,
page->count_info, mfn);
@@ -864,7 +872,7 @@
case PGT_ldt_page:
if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
{
- printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+ printk("Audit %d: type count!=0 t=%" PRtype_info " ot=%x c=%x
mfn=%lx\n",
d->domain_id, page->u.inuse.type_info,
page->tlbflush_timestamp,
page->count_info, mfn);
@@ -877,7 +885,7 @@
if ( (page->count_info & PGC_count_mask) != 1 )
{
- printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x mfn=%lx\n",
+ printk("Audit %d: gen count!=1 (c=%x) t=%" PRtype_info " ot=%x
mfn=%lx\n",
d->domain_id,
page->count_info,
page->u.inuse.type_info,
@@ -913,7 +921,7 @@
(page->count_info != 0) )
{
printk("Audit %d: shadow page counts wrong "
- "mfn=%lx t=%08x c=%08x\n",
+ "mfn=%lx t=%" PRtype_info " c=%08x\n",
d->domain_id, page_to_pfn(page),
page->u.inuse.type_info,
page->count_info);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/cpu/amd.c Thu Aug 25 22:53:20 2005
@@ -8,6 +8,20 @@
#include <asm/processor.h>
#include "cpu.h"
+
+/*
+ * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
+ * filter on AMD 64-bit processors.
+ */
+static int flush_filter_force;
+static void flush_filter(char *s)
+{
+ if (!strcmp(s, "off"))
+ flush_filter_force = -1;
+ if (!strcmp(s, "on"))
+ flush_filter_force = 1;
+}
+custom_param("amd_flush_filter", flush_filter);
#define num_physpages 0
@@ -25,7 +39,7 @@
*/
extern void vide(void);
-__asm__(".align 4\nvide: ret");
+__asm__(".text\n.align 4\nvide: ret");
static void __init init_amd(struct cpuinfo_x86 *c)
{
@@ -190,6 +204,21 @@
case 6:
set_bit(X86_FEATURE_K7, c->x86_capability);
break;
+ }
+
+ if (c->x86 == 15) {
+ rdmsr(MSR_K7_HWCR, l, h);
+ printk(KERN_INFO "CPU%d: AMD Flush Filter %sabled",
+ smp_processor_id(), (l & (1<<6)) ? "dis" : "en");
+ if ((flush_filter_force > 0) && (l & (1<<6))) {
+ l &= ~(1<<6);
+ printk(" -> Forcibly enabled");
+ } else if ((flush_filter_force < 0) && !(l & (1<<6))) {
+ l |= 1<<6;
+ printk(" -> Forcibly disabled");
+ }
+ wrmsr(MSR_K7_HWCR, l, h);
+ printk("\n");
}
display_cacheinfo(c);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/dom0_ops.c Thu Aug 25 22:53:20 2005
@@ -404,15 +404,17 @@
memcpy(c, &v->arch.guest_context, sizeof(*c));
- /* IOPL privileges are virtualised -- merge back into returned eflags. */
- BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
- c->user_regs.eflags |= v->arch.iopl << 12;
-
if ( VMX_DOMAIN(v) )
{
save_vmx_cpu_user_regs(&c->user_regs);
__vmread(CR0_READ_SHADOW, &c->ctrlreg[0]);
__vmread(CR4_READ_SHADOW, &c->ctrlreg[4]);
+ }
+ else
+ {
+ /* IOPL privileges are virtualised: merge back into returned eflags. */
+ BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
+ c->user_regs.eflags |= v->arch.iopl << 12;
}
c->flags = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/domain.c Thu Aug 25 22:53:20 2005
@@ -48,6 +48,8 @@
struct percpu_ctxt {
struct vcpu *curr_vcpu;
+ unsigned int context_not_finalised;
+ unsigned int dirty_segment_mask;
} __cacheline_aligned;
static struct percpu_ctxt percpu_ctxt[NR_CPUS];
@@ -190,7 +192,7 @@
{
list_for_each_entry ( page, &d->page_list, list )
{
- printk("Page %p: caf=%08x, taf=%08x\n",
+ printk("Page %p: caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), page->count_info,
page->u.inuse.type_info);
}
@@ -198,14 +200,14 @@
list_for_each_entry ( page, &d->xenpage_list, list )
{
- printk("XenPage %p: caf=%08x, taf=%08x\n",
+ printk("XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), page->count_info,
page->u.inuse.type_info);
}
page = virt_to_page(d->shared_info);
- printk("Shared_info@%p: caf=%08x, taf=%08x\n",
+ printk("Shared_info@%p: caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), page->count_info,
page->u.inuse.type_info);
}
@@ -215,8 +217,16 @@
return xmalloc(struct vcpu);
}
+/* We assume that vcpu 0 is always the last one to be freed in a
+ domain i.e. if v->vcpu_id == 0, the domain should be
+ single-processor. */
void arch_free_vcpu_struct(struct vcpu *v)
{
+ struct vcpu *p;
+ for_each_vcpu(v->domain, p) {
+ if (p->next_in_list == v)
+ p->next_in_list = v->next_in_list;
+ }
xfree(v);
}
@@ -295,26 +305,23 @@
l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
}
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+ if ( v->processor == newcpu )
+ return;
+
+ set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ v->processor = newcpu;
+
+ if ( VMX_DOMAIN(v) )
+ {
+ __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+ v->arch.schedule_tail = arch_vmx_do_relaunch;
+ }
+}
+
#ifdef CONFIG_VMX
static int vmx_switch_on;
-
-void arch_vmx_do_resume(struct vcpu *v)
-{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_resume(v);
- reset_stack_and_jump(vmx_asm_do_resume);
-}
-
-void arch_vmx_do_launch(struct vcpu *v)
-{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_launch(v);
- reset_stack_and_jump(vmx_asm_do_launch);
-}
static int vmx_final_setup_guest(
struct vcpu *v, struct vcpu_guest_context *ctxt)
@@ -346,7 +353,7 @@
v->arch.schedule_tail = arch_vmx_do_launch;
-#if defined (__i386)
+#if defined (__i386__)
v->domain->arch.vmx_platform.real_mode_data =
(unsigned long *) regs->esi;
#endif
@@ -404,7 +411,7 @@
{
if ( ((c->user_regs.cs & 3) == 0) ||
((c->user_regs.ss & 3) == 0) )
- return -EINVAL;
+ return -EINVAL;
}
clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
@@ -458,7 +465,7 @@
if ( !(c->flags & VGCF_VMX_GUEST) )
#endif
if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d,
- PGT_base_page_table) )
+ PGT_base_page_table) )
return -EINVAL;
}
@@ -479,7 +486,10 @@
}
update_pagetables(v);
-
+
+ if ( v->vcpu_id == 0 )
+ init_domain_time(d);
+
/* Don't redo final setup */
set_bit(_VCPUF_initialised, &v->vcpu_flags);
@@ -541,51 +551,59 @@
__r; })
#if CONFIG_VMX
-#define load_msrs(_p, _n) if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n)
#else
-#define load_msrs(_p, _n) ((void)0)
+#define load_msrs(n) ((void)0)
#endif
-static void load_segments(struct vcpu *p, struct vcpu *n)
-{
- struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS 0x01
+#define DIRTY_ES 0x02
+#define DIRTY_FS 0x04
+#define DIRTY_GS 0x08
+#define DIRTY_FS_BASE 0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
+{
struct vcpu_guest_context *nctxt = &n->arch.guest_context;
int all_segs_okay = 1;
+ unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+ /* Load and clear the dirty segment mask. */
+ dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+ percpu_ctxt[cpu].dirty_segment_mask = 0;
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+ if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+ if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
/*
* Either selector != 0 ==> reload.
* Also reload to reset FS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.fs |
- pctxt->fs_base |
+ if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
nctxt->user_regs.fs) )
- {
all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
- if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
- pctxt->fs_base = 0;
- }
/*
* Either selector != 0 ==> reload.
* Also reload to reset GS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.gs |
- pctxt->gs_base_user |
+ if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
nctxt->user_regs.gs) )
{
/* Reset GS_BASE with user %gs? */
- if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+ if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
- if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
- pctxt->gs_base_user = 0;
}
/* This can only be non-zero if selector is NULL. */
@@ -650,7 +668,9 @@
static void save_segments(struct vcpu *v)
{
- struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+ struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+ struct cpu_user_regs *regs = &ctxt->user_regs;
+ unsigned int dirty_segment_mask = 0;
if ( VMX_DOMAIN(v) )
rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +679,34 @@
__asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
__asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
__asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-
-static void clear_segments(void)
-{
- __asm__ __volatile__ (
- " movl %0,%%ds; "
- " movl %0,%%es; "
- " movl %0,%%fs; "
- " movl %0,%%gs; "
- ""safe_swapgs" "
- " movl %0,%%gs"
- : : "r" (0) );
+
+ if ( regs->ds )
+ dirty_segment_mask |= DIRTY_DS;
+
+ if ( regs->es )
+ dirty_segment_mask |= DIRTY_ES;
+
+ if ( regs->fs )
+ {
+ dirty_segment_mask |= DIRTY_FS;
+ ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+ }
+ else if ( ctxt->fs_base )
+ {
+ dirty_segment_mask |= DIRTY_FS_BASE;
+ }
+
+ if ( regs->gs )
+ {
+ dirty_segment_mask |= DIRTY_GS;
+ ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+ }
+ else if ( ctxt->gs_base_user )
+ {
+ dirty_segment_mask |= DIRTY_GS_BASE_USER;
+ }
+
+ percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
}
long do_switch_to_user(void)
@@ -706,10 +742,9 @@
#elif defined(__i386__)
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n) ((void)0)
-#define save_segments(_p) ((void)0)
-#define clear_segments() ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n) ((void)0)
+#define save_segments(p) ((void)0)
static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
{
@@ -726,9 +761,9 @@
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int cpu = smp_processor_id();
- struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
- struct vcpu *n = current;
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
+ struct vcpu *n = current;
if ( !is_idle_task(p->domain) )
{
@@ -786,23 +821,31 @@
void context_switch(struct vcpu *prev, struct vcpu *next)
{
- struct vcpu *realprev;
-
- local_irq_disable();
+ unsigned int cpu = smp_processor_id();
+
+ ASSERT(!local_irq_is_enabled());
set_current(next);
- if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) ||
- is_idle_task(next->domain) )
- {
- local_irq_enable();
- }
- else
+ if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
{
__context_switch();
-
- local_irq_enable();
-
+ percpu_ctxt[cpu].context_not_finalised = 1;
+ }
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ unsigned int cpu = smp_processor_id();
+
+ ASSERT(local_irq_is_enabled());
+
+ if ( percpu_ctxt[cpu].context_not_finalised )
+ {
+ percpu_ctxt[cpu].context_not_finalised = 0;
+
+ BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+
if ( VMX_DOMAIN(next) )
{
vmx_restore_msrs(next);
@@ -810,18 +853,10 @@
else
{
load_LDT(next);
- load_segments(realprev, next);
- load_msrs(realprev, next);
- }
- }
-
- /*
- * We do this late on because it doesn't need to be protected by the
- * schedule_lock, and because we want this to be the very last use of
- * 'prev' (after this point, a dying domain's info structure may be freed
- * without warning).
- */
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
+ load_segments(next);
+ load_msrs(next);
+ }
+ }
schedule_tail(next);
BUG();
@@ -835,12 +870,19 @@
int __sync_lazy_execstate(void)
{
- if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
- return 0;
- __context_switch();
- load_LDT(current);
- clear_segments();
- return 1;
+ unsigned long flags;
+ int switch_required;
+
+ local_irq_save(flags);
+
+ switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+ if ( switch_required )
+ __context_switch();
+
+ local_irq_restore(flags);
+
+ return switch_required;
}
void sync_lazy_execstate_cpu(unsigned int cpu)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/domain_build.c Thu Aug 25 22:53:20 2005
@@ -22,16 +22,28 @@
#include <asm/i387.h>
#include <asm/shadow.h>
-/* opt_dom0_mem: memory allocated to domain 0. */
-static unsigned int opt_dom0_mem;
+static long dom0_nrpages;
+
+/*
+ * dom0_mem:
+ * If +ve:
+ * * The specified amount of memory is allocated to domain 0.
+ * If -ve:
+ * * All of memory is allocated to domain 0, minus the specified amount.
+ * If not specified:
+ * * All of memory is allocated to domain 0, minus 1/16th which is reserved
+ * for uses such as DMA buffers (the reservation is clamped to 128MB).
+ */
static void parse_dom0_mem(char *s)
{
- unsigned long long bytes = parse_size_and_unit(s);
- /* If no unit is specified we default to kB units, not bytes. */
- if ( isdigit(s[strlen(s)-1]) )
- opt_dom0_mem = (unsigned int)bytes;
- else
- opt_dom0_mem = (unsigned int)(bytes >> 10);
+ unsigned long long bytes;
+ char *t = s;
+ if ( *s == '-' )
+ t++;
+ bytes = parse_size_and_unit(t);
+ dom0_nrpages = bytes >> PAGE_SHIFT;
+ if ( *s == '-' )
+ dom0_nrpages = -dom0_nrpages;
}
custom_param("dom0_mem", parse_dom0_mem);
@@ -57,11 +69,21 @@
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
-static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+static struct pfn_info *alloc_chunk(struct domain *d, unsigned long max_pages)
{
struct pfn_info *page;
- unsigned int order = get_order(max * PAGE_SIZE);
- if ( (max & (max-1)) != 0 )
+ unsigned int order;
+ /*
+ * Allocate up to 2MB at a time:
+ * 1. This prevents overflow of get_order() when allocating more than
+ * 4GB to domain 0 on a PAE machine.
+ * 2. It prevents allocating very large chunks from DMA pools before
+ * the >4GB pool is fully depleted.
+ */
+ if ( max_pages > (2UL << (20 - PAGE_SHIFT)) )
+ max_pages = 2UL << (20 - PAGE_SHIFT);
+ order = get_order(max_pages << PAGE_SHIFT);
+ if ( (max_pages & (max_pages-1)) != 0 )
order--;
while ( (page = alloc_domheap_pages(d, order, 0)) == NULL )
if ( order-- == 0 )
@@ -74,12 +96,12 @@
unsigned long _initrd_start, unsigned long initrd_len,
char *cmdline)
{
- int i, rc, dom0_pae, xen_pae;
+ int i, rc, dom0_pae, xen_pae, order;
unsigned long pfn, mfn;
unsigned long nr_pages;
unsigned long nr_pt_pages;
- unsigned long alloc_start;
- unsigned long alloc_end;
+ unsigned long alloc_spfn;
+ unsigned long alloc_epfn;
unsigned long count;
struct pfn_info *page = NULL;
start_info_t *si;
@@ -137,16 +159,30 @@
printk("*** LOADING DOMAIN 0 ***\n");
- /* By default DOM0 is allocated all available memory. */
d->max_pages = ~0U;
- if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
+
+ /*
+ * If domain 0 allocation isn't specified, reserve 1/16th of available
+ * memory for things like DMA buffers. This reservation is clamped to
+ * a maximum of 128MB.
+ */
+ if ( dom0_nrpages == 0 )
+ {
+ dom0_nrpages = avail_domheap_pages() +
+ ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+ ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ dom0_nrpages = min(dom0_nrpages / 16, 128L << (20 - PAGE_SHIFT));
+ dom0_nrpages = -dom0_nrpages;
+ }
+
+ /* Negative memory specification means "all memory - specified amount". */
+ if ( dom0_nrpages < 0 )
nr_pages = avail_domheap_pages() +
((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
- ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT);
- if ( (page = alloc_largest(d, nr_pages)) == NULL )
- panic("Not enough RAM for DOM0 reservation.\n");
- alloc_start = page_to_phys(page);
- alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT);
+ ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+ dom0_nrpages;
+ else
+ nr_pages = dom0_nrpages;
if ( (rc = parseelfimage(&dsi)) != 0 )
return rc;
@@ -166,7 +202,7 @@
return -EINVAL;
}
if (strstr(dsi.xen_section_string, "SHADOW=translate"))
- opt_dom0_translate = 1;
+ opt_dom0_translate = 1;
/* Align load address to 4MB boundary. */
dsi.v_start &= ~((1UL<<22)-1);
@@ -215,12 +251,19 @@
#endif
}
- if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
- panic("Insufficient contiguous RAM to build kernel image.\n");
+ order = get_order(v_end - dsi.v_start);
+ if ( (1UL << order) > nr_pages )
+ panic("Domain 0 allocation is too small for kernel image.\n");
+
+ /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+ if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
+ panic("Not enough RAM for domain 0 allocation.\n");
+ alloc_spfn = page_to_pfn(page);
+ alloc_epfn = alloc_spfn + d->tot_pages;
printk("PHYSICAL MEMORY ARRANGEMENT:\n"
- " Dom0 alloc.: %p->%p",
- _p(alloc_start), _p(alloc_end));
+ " Dom0 alloc.: %"PRIphysaddr"->%"PRIphysaddr,
+ pfn_to_phys(alloc_spfn), pfn_to_phys(alloc_epfn));
if ( d->tot_pages < nr_pages )
printk(" (%lu pages to be allocated)",
nr_pages - d->tot_pages);
@@ -249,7 +292,8 @@
return -ENOMEM;
}
- mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+ mpt_alloc = (vpt_start - dsi.v_start) +
+ (unsigned long)pfn_to_phys(alloc_spfn);
/*
* We're basically forcing default RPLs to 1, so that our "what privilege
@@ -306,7 +350,7 @@
#endif
l2tab += l2_linear_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
+ mfn = alloc_spfn;
for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
{
if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -428,7 +472,7 @@
v->arch.guest_table = mk_pagetable(__pa(l4start));
l4tab += l4_table_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
+ mfn = alloc_spfn;
for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
{
if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -563,24 +607,24 @@
/* Write the phys->machine and machine->phys table entries. */
for ( pfn = 0; pfn < d->tot_pages; pfn++ )
{
- mfn = pfn + (alloc_start>>PAGE_SHIFT);
+ mfn = pfn + alloc_spfn;
#ifndef NDEBUG
#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
if ( !opt_dom0_translate && (pfn > REVERSE_START) )
- mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+ mfn = alloc_epfn - (pfn - REVERSE_START);
#endif
((u32 *)vphysmap_start)[pfn] = mfn;
machine_to_phys_mapping[mfn] = pfn;
}
while ( pfn < nr_pages )
{
- if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+ if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
panic("Not enough RAM for DOM0 reservation.\n");
while ( pfn < d->tot_pages )
{
mfn = page_to_pfn(page);
#ifndef NDEBUG
-#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
+#define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
#endif
((u32 *)vphysmap_start)[pfn] = mfn;
machine_to_phys_mapping[mfn] = pfn;
@@ -614,19 +658,21 @@
/* DOM0 gets access to everything. */
physdev_init_dom0(d);
+ init_domain_time(d);
+
set_bit(_DOMF_constructed, &d->domain_flags);
new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
if ( opt_dom0_shadow || opt_dom0_translate )
{
- printk("dom0: shadow enable\n");
+ printk("dom0: shadow enable\n");
shadow_mode_enable(d, (opt_dom0_translate
? SHM_enable | SHM_refcounts | SHM_translate
: SHM_enable));
if ( opt_dom0_translate )
{
- printk("dom0: shadow translate\n");
+ printk("dom0: shadow translate\n");
#if defined(__i386__) && defined(CONFIG_X86_PAE)
printk("FIXME: PAE code needed here: %s:%d (%s)\n",
__FILE__, __LINE__, __FUNCTION__);
@@ -659,7 +705,7 @@
}
update_pagetables(v); /* XXX SMP */
- printk("dom0: shadow setup done\n");
+ printk("dom0: shadow setup done\n");
}
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/io_apic.c Thu Aug 25 22:53:20 2005
@@ -1751,8 +1751,30 @@
pin = (address - 0x10) >> 1;
+ *(u32 *)&rte = val;
rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- *(int *)&rte = val;
+
+ /*
+ * What about weird destination types?
+ * SMI: Ignore? Ought to be set up by the BIOS.
+ * NMI: Ignore? Watchdog functionality is Xen's concern.
+ * INIT: Definitely ignore: probably a guest OS bug.
+ * ExtINT: Ignore? Linux only asserts this at start of day.
+ * For now, print a message and return an error. We can fix up on demand.
+ */
+ if ( rte.delivery_mode > dest_LowestPrio )
+ {
+ printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
+ printk(" APIC=%d/%d, lo-reg=%x\n", apicid, pin, val);
+ return -EINVAL;
+ }
+
+ /*
+ * The guest does not know physical APIC arrangement (flat vs. cluster).
+ * Apply genapic conventions for this platform.
+ */
+ rte.delivery_mode = INT_DELIVERY_MODE;
+ rte.dest_mode = INT_DEST_MODE;
if ( rte.vector >= FIRST_DEVICE_VECTOR )
{
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/mm.c Thu Aug 25 22:53:20 2005
@@ -95,6 +95,7 @@
#include <xen/irq.h>
#include <xen/softirq.h>
#include <xen/domain_page.h>
+#include <xen/event.h>
#include <asm/shadow.h>
#include <asm/page.h>
#include <asm/flushtlb.h>
@@ -122,7 +123,7 @@
static void free_l1_table(struct pfn_info *page);
static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
- unsigned int type);
+ unsigned long type);
static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
/* Used to defer flushing of memory structures. */
@@ -138,7 +139,7 @@
* Returns the current foreign domain; defaults to the currently-executing
* domain if a foreign override hasn't been specified.
*/
-#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? :
current->domain)
+#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain)
/* Private domain structs for DOMID_XEN and DOMID_IO. */
static struct domain *dom_xen, *dom_io;
@@ -354,7 +355,7 @@
static int get_page_and_type_from_pagenr(unsigned long page_nr,
- u32 type,
+ unsigned long type,
struct domain *d)
{
struct pfn_info *page = &frame_table[page_nr];
@@ -365,7 +366,7 @@
if ( unlikely(!get_page_type(page, type)) )
{
if ( (type & PGT_type_mask) != PGT_l1_page_table )
- MEM_LOG("Bad page type for pfn %lx (%08x)",
+ MEM_LOG("Bad page type for pfn %lx (%" PRtype_info ")",
page_nr, page->u.inuse.type_info);
put_page(page);
return 0;
@@ -390,7 +391,7 @@
get_linear_pagetable(
root_pgentry_t re, unsigned long re_pfn, struct domain *d)
{
- u32 x, y;
+ unsigned long x, y;
struct pfn_info *page;
unsigned long pfn;
@@ -443,7 +444,7 @@
if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
return 0;
}
@@ -489,7 +490,7 @@
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
return 0;
}
@@ -522,7 +523,7 @@
if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
return 0;
}
@@ -544,7 +545,8 @@
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+ l4_pgentry_t l4e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -555,12 +557,15 @@
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+ MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
return 0;
}
+ vaddr >>= L4_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
rc = get_page_and_type_from_pagenr(
- l4e_get_pfn(l4e), PGT_l3_page_table, d);
+ l4e_get_pfn(l4e),
+ PGT_l3_page_table | vaddr, d);
if ( unlikely(!rc) )
return get_linear_pagetable(l4e, pfn, d);
@@ -731,7 +736,7 @@
pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ?
l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) :
- l2e_empty();
+ l2e_empty();
unmap_domain_page(pl2e);
return 1;
@@ -750,13 +755,47 @@
return 1;
}
+#elif CONFIG_X86_64
+# define create_pae_xen_mappings(pl3e) (1)
+
+static inline int l1_backptr(
+ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+ unsigned long l2_backptr = l2_type & PGT_va_mask;
+ BUG_ON(l2_backptr == PGT_va_unknown);
+
+ *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
+ (offset_in_l2 << L2_PAGETABLE_SHIFT);
+ return 1;
+}
+
+static inline int l2_backptr(
+ unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type)
+{
+ unsigned long l3_backptr = l3_type & PGT_va_mask;
+ BUG_ON(l3_backptr == PGT_va_unknown);
+
+ *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) |
+ (offset_in_l3 << L3_PAGETABLE_SHIFT);
+ return 1;
+}
+
+static inline int l3_backptr(
+ unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type)
+{
+ unsigned long l4_backptr = l4_type & PGT_va_mask;
+ BUG_ON(l4_backptr == PGT_va_unknown);
+
+ *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT);
+ return 1;
+}
#else
# define create_pae_xen_mappings(pl3e) (1)
# define l1_backptr(bp,l2o,l2t) \
({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
#endif
-static int alloc_l2_table(struct pfn_info *page, unsigned int type)
+static int alloc_l2_table(struct pfn_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
@@ -808,7 +847,7 @@
#if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct pfn_info *page)
+static int alloc_l3_table(struct pfn_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
@@ -818,10 +857,23 @@
ASSERT(!shadow_mode_refcounts(d));
+#ifdef CONFIG_X86_PAE
+ if ( pfn >= 0x100000 )
+ {
+ MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
+ return 0;
+ }
+#endif
+
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
+#if CONFIG_PAGING_LEVELS >= 4
+ if ( !l2_backptr(&vaddr, i, type) )
+ goto fail;
+#else
vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
+#endif
if ( is_guest_l3_slot(i) &&
unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
@@ -842,15 +894,16 @@
return 0;
}
#else
-#define alloc_l3_table(page) (0)
+#define alloc_l3_table(page, type) (0)
#endif
#if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct pfn_info *page)
+static int alloc_l4_table(struct pfn_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
+ unsigned long vaddr;
int i;
/* See the code in shadow_promote() to understand why this is here. */
@@ -860,9 +913,14 @@
ASSERT(!shadow_mode_refcounts(d));
for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( !l3_backptr(&vaddr, i, type) )
+ goto fail;
+
if ( is_guest_l4_slot(i) &&
- unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+ unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
goto fail;
+ }
/* Xen private mappings. */
memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -885,7 +943,7 @@
return 0;
}
#else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, type) (0)
#endif
@@ -967,7 +1025,7 @@
unlikely(o != l1e_get_intpte(ol1e)) )
{
MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
- ": saw %" PRIpte "\n",
+ ": saw %" PRIpte,
l1e_get_intpte(ol1e),
l1e_get_intpte(nl1e),
o);
@@ -993,7 +1051,7 @@
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 flags %x\n",
+ MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
return 0;
}
@@ -1037,10 +1095,10 @@
static int mod_l2_entry(l2_pgentry_t *pl2e,
l2_pgentry_t nl2e,
unsigned long pfn,
- unsigned int type)
+ unsigned long type)
{
l2_pgentry_t ol2e;
- unsigned long vaddr;
+ unsigned long vaddr = 0;
if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
{
@@ -1055,7 +1113,7 @@
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- MEM_LOG("Bad L2 flags %x\n",
+ MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
return 0;
}
@@ -1074,10 +1132,9 @@
return 0;
}
}
- else
- {
- if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
- return 0;
+ else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+ {
+ return 0;
}
put_page_from_l2e(ol2e, pfn);
@@ -1090,7 +1147,8 @@
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
static int mod_l3_entry(l3_pgentry_t *pl3e,
l3_pgentry_t nl3e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned long type)
{
l3_pgentry_t ol3e;
unsigned long vaddr;
@@ -1117,7 +1175,7 @@
{
if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
{
- MEM_LOG("Bad L3 flags %x\n",
+ MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
return 0;
}
@@ -1126,28 +1184,29 @@
if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
+#if CONFIG_PAGING_LEVELS >= 4
+ if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
+ unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
+ return 0;
+#else
vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
<< L3_PAGETABLE_SHIFT;
if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
return 0;
+#endif
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
{
- BUG_ON(!create_pae_xen_mappings(pl3e));
put_page_from_l3e(nl3e, pfn);
return 0;
}
-
- put_page_from_l3e(ol3e, pfn);
- return 1;
- }
-
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
- {
- BUG_ON(!create_pae_xen_mappings(pl3e));
+ }
+ else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ {
return 0;
}
+ BUG_ON(!create_pae_xen_mappings(pl3e));
put_page_from_l3e(ol3e, pfn);
return 1;
}
@@ -1159,9 +1218,11 @@
/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
static int mod_l4_entry(l4_pgentry_t *pl4e,
l4_pgentry_t nl4e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned long type)
{
l4_pgentry_t ol4e;
+ unsigned long vaddr;
if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
{
@@ -1176,7 +1237,7 @@
{
if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
{
- MEM_LOG("Bad L4 flags %x\n",
+ MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return 0;
}
@@ -1185,7 +1246,8 @@
if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e);
- if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
+ if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
+ unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
@@ -1193,13 +1255,11 @@
put_page_from_l4e(nl4e, pfn);
return 0;
}
-
- put_page_from_l4e(ol4e, pfn);
- return 1;
- }
-
- if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+ }
+ else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+ {
return 0;
+ }
put_page_from_l4e(ol4e, pfn);
return 1;
@@ -1207,7 +1267,7 @@
#endif
-int alloc_page_type(struct pfn_info *page, unsigned int type)
+int alloc_page_type(struct pfn_info *page, unsigned long type)
{
switch ( type & PGT_type_mask )
{
@@ -1216,14 +1276,14 @@
case PGT_l2_page_table:
return alloc_l2_table(page, type);
case PGT_l3_page_table:
- return alloc_l3_table(page);
+ return alloc_l3_table(page, type);
case PGT_l4_page_table:
- return alloc_l4_table(page);
+ return alloc_l4_table(page, type);
case PGT_gdt_page:
case PGT_ldt_page:
return alloc_segdesc_page(page);
default:
- printk("Bad type in alloc_page_type %x t=%x c=%x\n",
+ printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
type, page->u.inuse.type_info,
page->count_info);
BUG();
@@ -1233,7 +1293,7 @@
}
-void free_page_type(struct pfn_info *page, unsigned int type)
+void free_page_type(struct pfn_info *page, unsigned long type)
{
struct domain *owner = page_get_owner(page);
unsigned long gpfn;
@@ -1273,7 +1333,7 @@
#endif
default:
- printk("%s: type %x pfn %lx\n",__FUNCTION__,
+ printk("%s: type %lx pfn %lx\n",__FUNCTION__,
type, page_to_pfn(page));
BUG();
}
@@ -1282,7 +1342,7 @@
void put_page_type(struct pfn_info *page)
{
- u32 nx, x, y = page->u.inuse.type_info;
+ unsigned long nx, x, y = page->u.inuse.type_info;
again:
do {
@@ -1335,9 +1395,9 @@
}
-int get_page_type(struct pfn_info *page, u32 type)
-{
- u32 nx, x, y = page->u.inuse.type_info;
+int get_page_type(struct pfn_info *page, unsigned long type)
+{
+ unsigned long nx, x, y = page->u.inuse.type_info;
again:
do {
@@ -1388,8 +1448,11 @@
{
if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
((type & PGT_type_mask) != PGT_l1_page_table) )
- MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %lx",
- x, type, page_to_pfn(page));
+ MEM_LOG("Bad type (saw %" PRtype_info
+ "!= exp %" PRtype_info ") "
+ "for mfn %lx (pfn %x)",
+ x, type, page_to_pfn(page),
+ machine_to_phys_mapping[page_to_pfn(page)]);
return 0;
}
else if ( (x & PGT_va_mask) == PGT_va_mutable )
@@ -1427,8 +1490,8 @@
/* Try to validate page type; drop the new reference on failure. */
if ( unlikely(!alloc_page_type(page, type)) )
{
- MEM_LOG("Error while validating pfn %lx for type %08x."
- " caf=%08x taf=%08x",
+ MEM_LOG("Error while validating pfn %lx for type %" PRtype_info "."
+ " caf=%08x taf=%" PRtype_info,
page_to_pfn(page), type,
page->count_info,
page->u.inuse.type_info);
@@ -1537,7 +1600,7 @@
percpu_info[cpu].foreign = dom_io;
break;
default:
- MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id);
+ MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
okay = 0;
break;
}
@@ -1596,7 +1659,7 @@
{
struct mmuext_op op;
int rc = 0, i = 0, okay, cpu = smp_processor_id();
- unsigned int type, done = 0;
+ unsigned long type, done = 0;
struct pfn_info *page;
struct vcpu *v = current;
struct domain *d = v->domain, *e;
@@ -1674,16 +1737,16 @@
#ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */
case MMUEXT_PIN_L2_TABLE:
- type = PGT_l2_page_table;
+ type = PGT_l2_page_table | PGT_va_mutable;
goto pin_page;
#endif
case MMUEXT_PIN_L3_TABLE:
- type = PGT_l3_page_table;
+ type = PGT_l3_page_table | PGT_va_mutable;
goto pin_page;
case MMUEXT_PIN_L4_TABLE:
- type = PGT_l4_page_table;
+ type = PGT_l4_page_table | PGT_va_mutable;
goto pin_page;
case MMUEXT_UNPIN_TABLE:
@@ -1770,7 +1833,7 @@
case MMUEXT_FLUSH_CACHE:
if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
{
- MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+ MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
okay = 0;
}
else
@@ -1784,7 +1847,7 @@
if ( shadow_mode_external(d) )
{
MEM_LOG("ignoring SET_LDT hypercall from external "
- "domain %u\n", d->domain_id);
+ "domain %u", d->domain_id);
okay = 0;
break;
}
@@ -1855,7 +1918,7 @@
unlikely(IS_XEN_HEAP_FRAME(page)) )
{
MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
- "page is in Xen heap (%lx), or dom is dying (%ld).\n",
+ "page is in Xen heap (%lx), or dom is dying (%ld).",
e->tot_pages, e->max_pages, op.mfn, e->domain_flags);
okay = 0;
goto reassign_fail;
@@ -1876,9 +1939,9 @@
unlikely(_nd != _d) )
{
MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%08x\n", page_to_pfn(page),
- d, d->domain_id, unpickle_domptr(_nd), x,
- page->u.inuse.type_info);
+ " caf=%08x, taf=%" PRtype_info,
+ page_to_pfn(page), d, d->domain_id,
+ unpickle_domptr(_nd), x, page->u.inuse.type_info);
okay = 0;
goto reassign_fail;
}
@@ -1951,7 +2014,7 @@
unsigned int cmd, done = 0;
struct vcpu *v = current;
struct domain *d = v->domain;
- u32 type_info;
+ unsigned long type_info;
struct domain_mmap_cache mapcache, sh_mapcache;
LOCK_BIGLOCK(d);
@@ -2041,7 +2104,8 @@
l1e = l1e_from_intpte(req.val);
okay = mod_l1_entry(va, l1e);
if ( okay && unlikely(shadow_mode_enabled(d)) )
- shadow_l1_normal_pt_update(d, req.ptr, l1e,
&sh_mapcache);
+ shadow_l1_normal_pt_update(
+ d, req.ptr, l1e, &sh_mapcache);
put_page_type(page);
}
break;
@@ -2054,24 +2118,28 @@
/* FIXME: doesn't work with PAE */
l2e = l2e_from_intpte(req.val);
- okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn,
type_info);
+ okay = mod_l2_entry(
+ (l2_pgentry_t *)va, l2e, mfn, type_info);
if ( okay && unlikely(shadow_mode_enabled(d)) )
- shadow_l2_normal_pt_update(d, req.ptr, l2e,
&sh_mapcache);
+ shadow_l2_normal_pt_update(
+ d, req.ptr, l2e, &sh_mapcache);
put_page_type(page);
}
break;
#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
ASSERT( !shadow_mode_refcounts(d) );
- if ( likely(get_page_type(page, PGT_l3_page_table)) )
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
l3_pgentry_t l3e;
/* FIXME: doesn't work with PAE */
l3e = l3e_from_intpte(req.val);
- okay = mod_l3_entry(va, l3e, mfn);
+ okay = mod_l3_entry(va, l3e, mfn, type_info);
if ( okay && unlikely(shadow_mode_enabled(d)) )
- shadow_l3_normal_pt_update(d, req.ptr, l3e,
&sh_mapcache);
+ shadow_l3_normal_pt_update(
+ d, req.ptr, l3e, &sh_mapcache);
put_page_type(page);
}
break;
@@ -2079,14 +2147,16 @@
#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
ASSERT( !shadow_mode_refcounts(d) );
- if ( likely(get_page_type(page, PGT_l4_page_table)) )
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
l4_pgentry_t l4e;
l4e = l4e_from_intpte(req.val);
- okay = mod_l4_entry(va, l4e, mfn);
+ okay = mod_l4_entry(va, l4e, mfn, type_info);
if ( okay && unlikely(shadow_mode_enabled(d)) )
- shadow_l4_normal_pt_update(d, req.ptr, l4e,
&sh_mapcache);
+ shadow_l4_normal_pt_update(
+ d, req.ptr, l4e, &sh_mapcache);
put_page_type(page);
}
break;
@@ -2108,7 +2178,7 @@
}
}
- *(unsigned long *)va = req.val;
+ *(intpte_t *)va = req.val;
okay = 1;
if ( shadow_mode_enabled(d) )
@@ -2133,7 +2203,8 @@
if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
{
shadow_lock(FOREIGNDOM);
- printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx
for dom%d\n",
+ printk("privileged guest dom%d requests pfn=%lx to "
+ "map mfn=%lx for dom%d\n",
d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
set_machinetophys(mfn, gpfn);
set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
@@ -2199,60 +2270,213 @@
return rc;
}
-/* This function assumes the caller is holding the domain's BIGLOCK
- * and is running in a shadow mode
- */
-int update_grant_va_mapping(unsigned long va,
- l1_pgentry_t _nl1e,
- struct domain *d,
- struct vcpu *v)
-{
- /* Caller must:
- * . own d's BIGLOCK
- * . already have 'get_page' correctly on the to-be-installed nl1e
- * . be responsible for flushing the TLB
- * . check PTE being installed isn't DISALLOWED
+
+int update_grant_pte_mapping(
+ unsigned long pte_addr, l1_pgentry_t _nl1e,
+ struct domain *d, struct vcpu *v)
+{
+ int rc = GNTST_okay;
+ void *va;
+ unsigned long gpfn, mfn;
+ struct pfn_info *page;
+ u32 type_info;
+ l1_pgentry_t ol1e;
+
+ ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(!shadow_mode_refcounts(d));
+ ASSERT((l1e_get_flags(_nl1e) & L1_DISALLOW_MASK) == 0);
+
+ gpfn = pte_addr >> PAGE_SHIFT;
+ mfn = __gpfn_to_mfn(d, gpfn);
+
+ if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
+ {
+ MEM_LOG("Could not get page for normal update");
+ return GNTST_general_error;
+ }
+
+ va = map_domain_page(mfn);
+ va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK));
+ page = pfn_to_page(mfn);
+
+ type_info = page->u.inuse.type_info;
+ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
+ {
+ MEM_LOG("Grant map attempted to update a non-L1 page");
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ||
+ !update_l1e(va, ol1e, _nl1e) )
+ {
+ put_page_type(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ put_page_from_l1e(ol1e, d);
+
+ rc = (l1e_get_flags(ol1e) & _PAGE_PRESENT) ? GNTST_flush_all : GNTST_okay;
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ struct domain_mmap_cache sh_mapcache;
+ domain_mmap_cache_init(&sh_mapcache);
+ shadow_l1_normal_pt_update(d, pte_addr, _nl1e, &sh_mapcache);
+ domain_mmap_cache_destroy(&sh_mapcache);
+ }
+
+ put_page_type(page);
+
+ failed:
+ unmap_domain_page(va);
+ put_page(page);
+ return rc;
+}
+
+int clear_grant_pte_mapping(
+ unsigned long addr, unsigned long frame, struct domain *d)
+{
+ int rc = GNTST_okay;
+ void *va;
+ unsigned long gpfn, mfn;
+ struct pfn_info *page;
+ u32 type_info;
+ l1_pgentry_t ol1e;
+
+ ASSERT(!shadow_mode_refcounts(d));
+
+ gpfn = addr >> PAGE_SHIFT;
+ mfn = __gpfn_to_mfn(d, gpfn);
+
+ if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
+ {
+ MEM_LOG("Could not get page for normal update");
+ return GNTST_general_error;
+ }
+
+ va = map_domain_page(mfn);
+ va = (void *)((unsigned long)va + (addr & ~PAGE_MASK));
+ page = pfn_to_page(mfn);
+
+ type_info = page->u.inuse.type_info;
+ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
+ {
+ MEM_LOG("Grant map attempted to update a non-L1 page");
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
+ {
+ put_page_type(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ /* Check that the virtual address supplied is actually mapped to frame. */
+ if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
+ {
+ MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
+ (unsigned long)l1e_get_intpte(ol1e), addr, frame);
+ put_page_type(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ /* Delete pagetable entry. */
+ if ( unlikely(__put_user(0, (intpte_t *)va)))
+ {
+ MEM_LOG("Cannot delete PTE entry at %p", va);
+ put_page_type(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ struct domain_mmap_cache sh_mapcache;
+ domain_mmap_cache_init(&sh_mapcache);
+ shadow_l1_normal_pt_update(d, addr, l1e_empty(), &sh_mapcache);
+ domain_mmap_cache_destroy(&sh_mapcache);
+ }
+
+ put_page_type(page);
+
+ failed:
+ unmap_domain_page(va);
+ put_page(page);
+ return rc;
+}
+
+
+int update_grant_va_mapping(
+ unsigned long va, l1_pgentry_t _nl1e, struct domain *d, struct vcpu *v)
+{
+ int rc = GNTST_okay;
+ l1_pgentry_t *pl1e, ol1e;
+
+ ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(!shadow_mode_refcounts(d));
+ ASSERT((l1e_get_flags(_nl1e) & L1_DISALLOW_MASK) == 0);
+
+ /*
+ * This is actually overkill - we don't need to sync the L1 itself,
+ * just everything involved in getting to this L1 (i.e. we need
+ * linear_pg_table[l1_linear_offset(va)] to be in sync)...
*/
-
- int rc = 0;
- l1_pgentry_t *pl1e;
- l1_pgentry_t ol1e;
-
- cleanup_writable_pagetable(d);
-
- // This is actually overkill - we don't need to sync the L1 itself,
- // just everything involved in getting to this L1 (i.e. we need
- // linear_pg_table[l1_linear_offset(va)] to be in sync)...
- //
__shadow_sync_va(v, va);
pl1e = &linear_pg_table[l1_linear_offset(va)];
- if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- rc = -EINVAL;
- else if ( !shadow_mode_refcounts(d) )
- {
- if ( update_l1e(pl1e, ol1e, _nl1e) )
- {
- put_page_from_l1e(ol1e, d);
- if ( l1e_get_flags(ol1e) & _PAGE_PRESENT )
- rc = 0; /* Caller needs to invalidate TLB entry */
- else
- rc = 1; /* Caller need not invalidate TLB entry */
- }
- else
- rc = -EINVAL;
- }
- else
- {
- printk("grant tables and shadow mode currently don't work together\n");
- BUG();
- }
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
+ !update_l1e(pl1e, ol1e, _nl1e) )
+ return GNTST_general_error;
+
+ put_page_from_l1e(ol1e, d);
+
+ rc = (l1e_get_flags(ol1e) & _PAGE_PRESENT) ? GNTST_flush_one : GNTST_okay;
if ( unlikely(shadow_mode_enabled(d)) )
shadow_do_update_va_mapping(va, _nl1e, v);
return rc;
+}
+
+int clear_grant_va_mapping(unsigned long addr, unsigned long frame)
+{
+ l1_pgentry_t *pl1e, ol1e;
+
+ pl1e = &linear_pg_table[l1_linear_offset(addr)];
+
+ if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
+ {
+ MEM_LOG("Could not find PTE entry for address %lx", addr);
+ return GNTST_general_error;
+ }
+
+ /*
+ * Check that the virtual address supplied is actually mapped to
+ * frame.
+ */
+ if ( unlikely(l1e_get_pfn(ol1e) != frame) )
+ {
+ MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
+ l1e_get_pfn(ol1e), addr, frame);
+ return GNTST_general_error;
+ }
+
+ /* Delete pagetable entry. */
+ if ( unlikely(__put_user(0, &pl1e->l1)) )
+ {
+ MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
+ return GNTST_general_error;
+ }
+
+ return 0;
}
@@ -2289,10 +2513,11 @@
(shadow_mode_translate(d) ||
shadow_mode_translate(percpu_info[cpu].foreign))) )
{
- // The foreign domain's pfn's are in a different namespace.
- // There's not enough information in just a gpte to figure out
- // how to (re-)shadow this entry.
- //
+ /*
+ * The foreign domain's pfn's are in a different namespace. There's
+ * not enough information in just a gpte to figure out how to
+ * (re-)shadow this entry.
+ */
domain_crash();
}
@@ -2409,14 +2634,16 @@
if ( entries > FIRST_RESERVED_GDT_ENTRY )
return -EINVAL;
-
+
shadow_sync_all(d);
/* Check the pages in the new GDT. */
- for ( i = 0; i < nr_pages; i++ )
- if ( ((pfn = frames[i]) >= max_page) ||
- !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
+ for ( i = 0; i < nr_pages; i++ ) {
+ pfn = frames[i];
+ if ((pfn >= max_page) ||
+ !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
goto fail;
+ }
/* Tear down the old GDT. */
destroy_gdt(v);
@@ -2463,22 +2690,24 @@
}
-long do_update_descriptor(unsigned long pa, u64 desc)
+long do_update_descriptor(u64 pa, u64 desc)
{
struct domain *dom = current->domain;
unsigned long gpfn = pa >> PAGE_SHIFT;
unsigned long mfn;
- unsigned int offset = (pa & ~PAGE_MASK) / sizeof(struct desc_struct);
+ unsigned int offset;
struct desc_struct *gdt_pent, d;
struct pfn_info *page;
long ret = -EINVAL;
+ offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
+
*(u64 *)&d = desc;
LOCK_BIGLOCK(dom);
if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ||
- ((pa % sizeof(struct desc_struct)) != 0) ||
+ (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
(mfn >= max_page) ||
!check_descriptor(&d) )
{
@@ -2547,7 +2776,7 @@
* Writable Pagetables
*/
-#ifdef VERBOSE
+#ifdef VVERBOSE
int ptwr_debug = 0x0;
#define PTWR_PRINTK(_f, _a...) \
do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
@@ -2556,18 +2785,128 @@
#define PTWR_PRINTK(_f, _a...) ((void)0)
#endif
+
+#ifdef PERF_ARRAYS
+
+/**************** writeable pagetables profiling functions *****************/
+
+#define ptwr_eip_buckets 256
+
+int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES};
+
+#define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int))
+
+struct {
+ unsigned long eip;
+ domid_t id;
+ u32 val[ptwr_eip_stat_thresholdN];
+} typedef ptwr_eip_stat_t;
+
+ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets];
+
+static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id )
+{
+ return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) %
+ ptwr_eip_buckets;
+}
+
+static void ptwr_eip_stat_inc(u32 *n)
+{
+ int i, j;
+
+ if ( ++(*n) != 0 )
+ return;
+
+ *n = ~0;
+
+ /* Re-scale all buckets. */
+ for ( i = 0; i <ptwr_eip_buckets; i++ )
+ for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+ ptwr_eip_stats[i].val[j] >>= 1;
+}
+
+static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified)
+{
+ int i, j, b;
+
+ i = b = ptwr_eip_stat_hash(eip, id);
+
+ do
+ {
+ if ( !ptwr_eip_stats[i].eip )
+ {
+ /* doesn't exist */
+ ptwr_eip_stats[i].eip = eip;
+ ptwr_eip_stats[i].id = id;
+ memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val));
+ }
+
+ if ( ptwr_eip_stats[i].eip == eip )
+ {
+ for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+ if ( modified <= ptwr_eip_stat_threshold[j] )
+ break;
+ BUG_ON(j >= ptwr_eip_stat_thresholdN);
+ ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]);
+ return;
+ }
+
+ i = (i+1) % ptwr_eip_buckets;
+ }
+ while ( i != b );
+
+ printk("ptwr_eip_stat: too many EIPs in use!\n");
+
+ ptwr_eip_stat_print();
+ ptwr_eip_stat_reset();
+}
+
+void ptwr_eip_stat_reset(void)
+{
+ memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats));
+}
+
+void ptwr_eip_stat_print(void)
+{
+ struct domain *e;
+ domid_t d;
+ int i, j;
+
+ for_each_domain( e )
+ {
+ d = e->domain_id;
+
+ for ( i = 0; i < ptwr_eip_buckets; i++ )
+ {
+ if ( ptwr_eip_stats[i].eip && ptwr_eip_stats[i].id != d )
+ continue;
+
+ printk("D %d eip %08lx ",
+ ptwr_eip_stats[i].id, ptwr_eip_stats[i].eip);
+
+ for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+ printk("<=%u %4u \t",
+ ptwr_eip_stat_threshold[j],
+ ptwr_eip_stats[i].val[j]);
+ printk("\n");
+ }
+ }
+}
+
+#else /* PERF_ARRAYS */
+
+#define ptwr_eip_stat_update(eip, id, modified) ((void)0)
+
+#endif
+
+/*******************************************************************/
+
/* Re-validate a given p.t. page, given its prior snapshot */
-int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t
*snapshot)
+int revalidate_l1(
+ struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
{
l1_pgentry_t ol1e, nl1e;
int modified = 0, i;
-
-#if 0
- if ( d->domain_id )
- printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
- l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned
long)l1page)]),
- l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned
long)snapshot)]));
-#endif
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
@@ -2593,7 +2932,7 @@
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
{
- MEM_LOG("ptwr: Could not re-validate l1 page\n");
+ MEM_LOG("ptwr: Could not re-validate l1 page");
/*
* Make the remaining p.t's consistent before crashing, so the
* reference counts are correct.
@@ -2614,24 +2953,34 @@
/* Flush the given writable p.t. page and write-protect it again. */
void ptwr_flush(struct domain *d, const int which)
{
- unsigned long pte, *ptep, l1va;
- l1_pgentry_t *pl1e;
+ unsigned long l1va;
+ l1_pgentry_t *pl1e, pte, *ptep;
l2_pgentry_t *pl2e;
unsigned int modified;
+#ifdef CONFIG_X86_64
+ struct vcpu *v = current;
+ extern void toggle_guest_mode(struct vcpu *);
+ int user_mode = !(v->arch.flags & TF_kernel_mode);
+#endif
+
ASSERT(!shadow_mode_enabled(d));
if ( unlikely(d->arch.ptwr[which].vcpu != current) )
- write_ptbase(d->arch.ptwr[which].vcpu);
+ /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
+ write_cr3(pagetable_get_paddr(
+ d->arch.ptwr[which].vcpu->arch.guest_table));
+ else
+ TOGGLE_MODE();
l1va = d->arch.ptwr[which].l1va;
- ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)];
+ ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)];
/*
* STEP 1. Write-protect the p.t. page so no more updates can occur.
*/
- if ( unlikely(__get_user(pte, ptep)) )
+ if ( unlikely(__get_user(pte.l1, &ptep->l1)) )
{
MEM_LOG("ptwr: Could not read pte at %p", ptep);
/*
@@ -2640,9 +2989,9 @@
*/
BUG();
}
- PTWR_PRINTK("[%c] disconnected_l1va at %p is %lx\n",
- PTWR_PRINT_WHICH, ptep, pte);
- pte &= ~_PAGE_RW;
+ PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
+ PTWR_PRINT_WHICH, ptep, pte.l1);
+ l1e_remove_flags(pte, _PAGE_RW);
/* Write-protect the p.t. page in the guest page table. */
if ( unlikely(__put_user(pte, ptep)) )
@@ -2658,8 +3007,8 @@
/* Ensure that there are no stale writable mappings in any TLB. */
/* NB. INVLPG is a serialising instruction: flushes pending updates. */
flush_tlb_one_mask(d->cpumask, l1va);
- PTWR_PRINTK("[%c] disconnected_l1va at %p now %lx\n",
- PTWR_PRINT_WHICH, ptep, pte);
+ PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
+ PTWR_PRINT_WHICH, ptep, pte.l1);
/*
* STEP 2. Validate any modified PTEs.
@@ -2669,6 +3018,7 @@
modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
unmap_domain_page(pl1e);
perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
+ ptwr_eip_stat_update( d->arch.ptwr[which].eip, d->domain_id, modified);
d->arch.ptwr[which].prev_nr_updates = modified;
/*
@@ -2689,6 +3039,8 @@
if ( unlikely(d->arch.ptwr[which].vcpu != current) )
write_ptbase(current);
+ else
+ TOGGLE_MODE();
}
static int ptwr_emulated_update(
@@ -2706,13 +3058,13 @@
/* Aligned access only, thank you. */
if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
{
- MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n",
+ MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
bytes, addr);
return X86EMUL_UNHANDLEABLE;
}
/* Turn a sub-word access into a full-word access. */
- if (bytes != sizeof(physaddr_t))
+ if ( bytes != sizeof(physaddr_t) )
{
int rc;
physaddr_t full;
@@ -2721,7 +3073,7 @@
/* Align address; read full word. */
addr &= ~(sizeof(physaddr_t)-1);
if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
- sizeof(physaddr_t))) )
+ sizeof(physaddr_t))) )
return rc;
/* Mask out bits provided by caller. */
full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
@@ -2729,13 +3081,17 @@
val &= (((physaddr_t)1 << (bytes*8)) - 1);
val <<= (offset)*8;
val |= full;
+ /* Also fill in missing parts of the cmpxchg old value. */
+ old &= (((physaddr_t)1 << (bytes*8)) - 1);
+ old <<= (offset)*8;
+ old |= full;
}
/* Read the PTE that maps the page being updated. */
if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
sizeof(pte)))
{
- MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
+ MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
return X86EMUL_UNHANDLEABLE;
}
@@ -2747,7 +3103,8 @@
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
(page_get_owner(page) != d) )
{
- MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%lx, %08x)\n",
+ MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
+ "(%lx, %" PRtype_info ")",
l1e_get_pfn(pte), page->u.inuse.type_info);
return X86EMUL_UNHANDLEABLE;
}
@@ -2763,7 +3120,7 @@
if ( do_cmpxchg )
{
ol1e = l1e_from_intpte(old);
- if ( cmpxchg((unsigned long *)pl1e, old, val) != old )
+ if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
{
unmap_domain_page(pl1e);
put_page_from_l1e(nl1e, d);
@@ -2821,14 +3178,15 @@
};
/* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(struct domain *d, unsigned long addr)
+int ptwr_do_page_fault(struct domain *d, unsigned long addr,
+ struct cpu_user_regs *regs)
{
unsigned long pfn;
struct pfn_info *page;
l1_pgentry_t pte;
- l2_pgentry_t *pl2e;
+ l2_pgentry_t *pl2e, l2e;
int which;
- u32 l2_idx;
+ unsigned long l2_idx;
if ( unlikely(shadow_mode_enabled(d)) )
return 0;
@@ -2837,7 +3195,7 @@
* Attempt to read the PTE that maps the VA being accessed. By checking for
* PDE validity in the L2 we avoid many expensive fixups in __get_user().
*/
- if ( !(l2e_get_flags(__linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
+ if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
_PAGE_PRESENT) ||
__copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
sizeof(pte)) )
@@ -2857,9 +3215,8 @@
return 0;
}
- /* x86/64: Writable pagetable code needs auditing. Use emulator for now. */
-#if defined(__x86_64__)
- goto emulate;
+#if 0 /* Leave this in as useful for debugging */
+ goto emulate;
#endif
/* Get the L2 index at which this L1 p.t. is always mapped. */
@@ -2868,7 +3225,7 @@
goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
l2_idx >>= PGT_va_shift;
- if ( unlikely(l2_idx == (addr >> L2_PAGETABLE_SHIFT)) )
+ if ( unlikely(l2_idx == l2_linear_offset(addr)) )
goto emulate; /* Urk! Pagetable maps itself! */
/*
@@ -2877,7 +3234,8 @@
*/
pl2e = &__linear_l2_table[l2_idx];
which = PTWR_PT_INACTIVE;
- if ( (l2e_get_pfn(*pl2e)) == pfn )
+
+ if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
{
/*
* Check the PRESENT bit to set ACTIVE mode.
@@ -2885,7 +3243,7 @@
* ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
* The ptwr_flush call below will restore the PRESENT bit.
*/
- if ( likely(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
+ if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
(d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
(l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
which = PTWR_PT_ACTIVE;
@@ -2905,7 +3263,7 @@
goto emulate;
}
- PTWR_PRINTK("[%c] page_fault on l1 pt at va %lx, pt for %08x, "
+ PTWR_PRINTK("[%c] page_fault on l1 pt at va %lx, pt for %08lx, "
"pfn %lx\n", PTWR_PRINT_WHICH,
addr, l2_idx << L2_PAGETABLE_SHIFT, pfn);
@@ -2930,7 +3288,11 @@
d->arch.ptwr[which].l1va = addr | 1;
d->arch.ptwr[which].l2_idx = l2_idx;
d->arch.ptwr[which].vcpu = current;
-
+
+#ifdef PERF_ARRAYS
+ d->arch.ptwr[which].eip = regs->eip;
+#endif
+
/* For safety, disconnect the L1 p.t. page from current space. */
if ( which == PTWR_PT_ACTIVE )
{
@@ -2946,11 +3308,11 @@
/* Finally, make the p.t. page writable by the guest OS. */
l1e_add_flags(pte, _PAGE_RW);
- if ( unlikely(__copy_to_user(&linear_pg_table[addr>>PAGE_SHIFT],
- &pte, sizeof(pte))) )
+ if ( unlikely(__put_user(pte.l1,
+ &linear_pg_table[l1_linear_offset(addr)].l1)) )
{
MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
- &linear_pg_table[addr>>PAGE_SHIFT]);
+ &linear_pg_table[l1_linear_offset(addr)]);
/* Toss the writable pagetable state and crash. */
unmap_domain_page(d->arch.ptwr[which].pl1e);
d->arch.ptwr[which].l1va = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/physdev.c Thu Aug 25 22:53:20 2005
@@ -106,7 +106,7 @@
(op.u.set_iobitmap.nr_ports > 65536) )
break;
ret = 0;
- current->arch.iobmp = (u8 *)op.u.set_iobitmap.bitmap;
+ current->arch.iobmp = op.u.set_iobitmap.bitmap;
current->arch.iobmp_limit = op.u.set_iobitmap.nr_ports;
break;
default:
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/setup.c Thu Aug 25 22:53:20 2005
@@ -244,15 +244,17 @@
#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
+static struct e820entry e820_raw[E820MAX];
+
void __init __start_xen(multiboot_info_t *mbi)
{
char *cmdline;
module_t *mod = (module_t *)__va(mbi->mods_addr);
- unsigned long firsthole_start, nr_pages;
+ unsigned long nr_pages, modules_length;
unsigned long initial_images_start, initial_images_end;
unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
- struct e820entry e820_raw[E820MAX];
+ physaddr_t s, e;
int i, e820_raw_nr = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
.data_bits = 8,
@@ -330,22 +332,30 @@
max_page = init_e820(e820_raw, &e820_raw_nr);
- /* Find the first high-memory RAM hole. */
- for ( i = 0; i < e820.nr_map; i++ )
+ modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
+
+ /* Find a large enough RAM extent to stash the DOM0 modules. */
+ for ( i = 0; ; i++ )
+ {
+ if ( i == e820.nr_map )
+ {
+ printk("Not enough memory to stash the DOM0 kernel image.\n");
+ for ( ; ; ) ;
+ }
+
if ( (e820.map[i].type == E820_RAM) &&
- (e820.map[i].addr >= 0x100000) )
+ (e820.map[i].size >= modules_length) &&
+ ((e820.map[i].addr + e820.map[i].size) >=
+ (xenheap_phys_end + modules_length)) )
break;
- firsthole_start = e820.map[i].addr + e820.map[i].size;
-
- /* Relocate the Multiboot modules. */
- initial_images_start = xenheap_phys_end;
- initial_images_end = initial_images_start +
- (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
- if ( initial_images_end > firsthole_start )
- {
- printk("Not enough memory to stash the DOM0 kernel image.\n");
- for ( ; ; ) ;
- }
+ }
+
+ /* Stash as near as possible to the beginning of the RAM extent. */
+ initial_images_start = e820.map[i].addr;
+ if ( initial_images_start < xenheap_phys_end )
+ initial_images_start = xenheap_phys_end;
+ initial_images_end = initial_images_start + modules_length;
+
#if defined(CONFIG_X86_32)
memmove((void *)initial_images_start, /* use low mapping */
(void *)mod[0].mod_start, /* use low mapping */
@@ -358,16 +368,23 @@
/* Initialise boot-time allocator with all RAM situated after modules. */
xenheap_phys_start = init_boot_allocator(__pa(&_end));
- nr_pages = 0;
+ nr_pages = 0;
for ( i = 0; i < e820.nr_map; i++ )
{
if ( e820.map[i].type != E820_RAM )
continue;
+
nr_pages += e820.map[i].size >> PAGE_SHIFT;
- if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end )
- init_boot_pages((e820.map[i].addr < initial_images_end) ?
- initial_images_end : e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
+
+ /* Initialise boot heap, skipping Xen heap and dom0 modules. */
+ s = e820.map[i].addr;
+ e = s + e820.map[i].size;
+ if ( s < xenheap_phys_end )
+ s = xenheap_phys_end;
+ if ( (s < initial_images_end) && (e > initial_images_start) )
+ s = initial_images_end;
+ init_boot_pages(s, e);
+
#if defined (CONFIG_X86_64)
/*
* x86/64 maps all registered RAM. Points to note:
@@ -404,10 +421,30 @@
end_boot_allocator();
- init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
- printk("Xen heap: %luMB (%lukB)\n",
- (xenheap_phys_end-xenheap_phys_start) >> 20,
- (xenheap_phys_end-xenheap_phys_start) >> 10);
+ /* Initialise the Xen heap, skipping RAM holes. */
+ nr_pages = 0;
+ for ( i = 0; i < e820.nr_map; i++ )
+ {
+ if ( e820.map[i].type != E820_RAM )
+ continue;
+
+ s = e820.map[i].addr;
+ e = s + e820.map[i].size;
+ if ( s < xenheap_phys_start )
+ s = xenheap_phys_start;
+ if ( e > xenheap_phys_end )
+ e = xenheap_phys_end;
+
+ if ( s < e )
+ {
+ nr_pages += (e - s) >> PAGE_SHIFT;
+ init_xenheap_pages(s, e);
+ }
+ }
+
+ printk("Xen heap: %luMB (%lukB)\n",
+ nr_pages >> (20 - PAGE_SHIFT),
+ nr_pages << (PAGE_SHIFT - 10));
early_boot = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow.c Thu Aug 25 22:53:20 2005
@@ -1578,7 +1578,7 @@
if ( unlikely(!VALID_MFN(gmfn)) )
{
- SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
+ SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
*spte_p = l1e_empty();
return 0;
}
@@ -1612,7 +1612,7 @@
if ( unlikely(!VALID_MFN(mfn)) )
{
- SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
+ SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
*spte_p = l1e_empty();
return 0;
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow32.c Thu Aug 25 22:53:20 2005
@@ -418,7 +418,7 @@
break;
default:
- printk("Free shadow weird page type mfn=%lx type=%08x\n",
+ printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
page_to_pfn(page), page->u.inuse.type_info);
break;
}
@@ -665,7 +665,7 @@
shadow_audit(d, 0);
- SH_LOG("Free shadow table.");
+ SH_VLOG("Free shadow table.");
}
void shadow_mode_init(void)
@@ -1137,7 +1137,7 @@
d->arch.shadow_ht_free = NULL;
ASSERT(d->arch.shadow_extras_count == 0);
- SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
+ SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
if ( d->arch.shadow_dirty_bitmap != NULL )
{
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow_public.c Thu Aug 25 22:53:20 2005
@@ -571,7 +571,7 @@
break;
default:
- printk("Free shadow weird page type mfn=%lx type=%08x\n",
+ printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
page_to_pfn(page), page->u.inuse.type_info);
break;
}
@@ -1638,14 +1638,14 @@
/* XXX This needs more thought... */
printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
__func__, page_to_pfn(page));
- printk("Before: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
page->count_info, page->u.inuse.type_info);
shadow_lock(d);
__shadow_remove_all_access(d, page_to_pfn(page));
shadow_unlock(d);
- printk("After: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
page->count_info, page->u.inuse.type_info);
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/smpboot.c Thu Aug 25 22:53:20 2005
@@ -434,7 +434,6 @@
unsigned int cpu = cpucount;
extern void percpu_traps_init(void);
- extern void cpu_init(void);
set_current(idle_task[cpu]);
set_processor_id(cpu);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/time.c Thu Aug 25 22:53:20 2005
@@ -43,7 +43,8 @@
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
int timer_ack = 0;
unsigned long volatile jiffies;
-static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
+static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED;
struct time_scale {
int shift;
@@ -67,13 +68,6 @@
static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
static u64 (*read_platform_count)(void);
-static inline u32 down_shift(u64 time, int shift)
-{
- if ( shift < 0 )
- return (u32)(time >> -shift);
- return (u32)((u32)time << shift);
-}
-
/*
* 32-bit division of integer dividend and integer divisor yielding
* 32-bit fractional quotient.
@@ -83,7 +77,7 @@
u32 quotient, remainder;
ASSERT(dividend < divisor);
__asm__ (
- "div %4"
+ "divl %4"
: "=a" (quotient), "=d" (remainder)
: "0" (0), "1" (dividend), "r" (divisor) );
return quotient;
@@ -101,6 +95,42 @@
: "=a" (product_frac), "=d" (product_int)
: "0" (multiplicand), "r" (multiplier) );
return product_int;
+}
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, struct time_scale *scale)
+{
+ u64 product;
+#ifdef CONFIG_X86_32
+ u32 tmp1, tmp2;
+#endif
+
+ if ( scale->shift < 0 )
+ delta >>= -scale->shift;
+ else
+ delta <<= scale->shift;
+
+#ifdef CONFIG_X86_32
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "add %4,%%eax ; "
+ "xor %5,%5 ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
+#else
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
+#endif
+
+ return product;
}
void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
@@ -486,11 +516,9 @@
static s_time_t __read_platform_stime(u64 platform_time)
{
- u64 diff64 = platform_time - platform_timer_stamp;
- u32 diff = down_shift(diff64, platform_timer_scale.shift);
+ u64 diff = platform_time - platform_timer_stamp;
ASSERT(spin_is_locked(&platform_timer_lock));
- return (stime_platform_stamp +
- (u64)mul_frac(diff, platform_timer_scale.mul_frac));
+ return (stime_platform_stamp + scale_delta(diff, &platform_timer_scale));
}
static s_time_t read_platform_stime(void)
@@ -619,15 +647,27 @@
s_time_t get_s_time(void)
{
struct cpu_time *t = &cpu_time[smp_processor_id()];
- u64 tsc;
- u32 delta;
+ u64 tsc, delta;
s_time_t now;
rdtscll(tsc);
- delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
- now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
+ delta = tsc - t->local_tsc_stamp;
+ now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
return now;
+}
+
+static inline void version_update_begin(u32 *version)
+{
+ /* Explicitly OR with 1 just in case version number gets out of sync. */
+ *version = (*version + 1) | 1;
+ wmb();
+}
+
+static inline void version_update_end(u32 *version)
+{
+ wmb();
+ (*version)++;
}
static inline void __update_dom_time(struct vcpu *v)
@@ -635,20 +675,14 @@
struct cpu_time *t = &cpu_time[smp_processor_id()];
struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
- u->time_version1++;
- wmb();
+ version_update_begin(&u->version);
u->tsc_timestamp = t->local_tsc_stamp;
u->system_time = t->stime_local_stamp;
u->tsc_to_system_mul = t->tsc_scale.mul_frac;
u->tsc_shift = (s8)t->tsc_scale.shift;
- wmb();
- u->time_version2++;
-
- /* Should only do this during do_settime(). */
- v->domain->shared_info->wc_sec = wc_sec;
- v->domain->shared_info->wc_usec = wc_usec;
+ version_update_end(&u->version);
}
void update_dom_time(struct vcpu *v)
@@ -659,21 +693,43 @@
}
/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
-{
- u64 x, base_usecs;
- u32 y;
-
- base_usecs = system_time_base;
- do_div(base_usecs, 1000);
-
- x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
- y = do_div(x, 1000000);
-
- wc_sec = (unsigned long)x;
- wc_usec = (unsigned long)y;
-
- __update_dom_time(current);
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
+{
+ u64 x;
+ u32 y, _wc_sec, _wc_nsec;
+ struct domain *d;
+ shared_info_t *s;
+
+ x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
+ y = do_div(x, 1000000000);
+
+ wc_sec = _wc_sec = (u32)x;
+ wc_nsec = _wc_nsec = (u32)y;
+
+ read_lock(&domlist_lock);
+ spin_lock(&wc_lock);
+
+ for_each_domain ( d )
+ {
+ s = d->shared_info;
+ version_update_begin(&s->wc_version);
+ s->wc_sec = _wc_sec;
+ s->wc_nsec = _wc_nsec;
+ version_update_end(&s->wc_version);
+ }
+
+ spin_unlock(&wc_lock);
+ read_unlock(&domlist_lock);
+}
+
+void init_domain_time(struct domain *d)
+{
+ spin_lock(&wc_lock);
+ version_update_begin(&d->shared_info->wc_version);
+ d->shared_info->wc_sec = wc_sec;
+ d->shared_info->wc_nsec = wc_nsec;
+ version_update_end(&d->shared_info->wc_version);
+ spin_unlock(&wc_lock);
}
static void local_time_calibration(void *unused)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/traps.c Thu Aug 25 22:53:20 2005
@@ -159,10 +159,8 @@
addr = *stack++;
if ( is_kernel_text(addr) )
{
- if ( (i != 0) && ((i % 6) == 0) )
- printk("\n ");
printk("[<%p>]", _p(addr));
- print_symbol(" %s\n", addr);
+ print_symbol(" %s\n ", addr);
i++;
}
}
@@ -422,7 +420,7 @@
{
LOCK_BIGLOCK(d);
if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
- unlikely((addr >> L2_PAGETABLE_SHIFT) ==
+ unlikely(l2_linear_offset(addr) ==
d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
{
ptwr_flush(d, PTWR_PT_ACTIVE);
@@ -430,10 +428,15 @@
return EXCRET_fault_fixed;
}
- if ( (addr < HYPERVISOR_VIRT_START) &&
+ if ( ((addr < HYPERVISOR_VIRT_START)
+#if defined(__x86_64__)
+ || (addr >= HYPERVISOR_VIRT_END)
+#endif
+ )
+ &&
KERNEL_MODE(v, regs) &&
((regs->error_code & 3) == 3) && /* write-protection fault */
- ptwr_do_page_fault(d, addr) )
+ ptwr_do_page_fault(d, addr, regs) )
{
UNLOCK_BIGLOCK(d);
return EXCRET_fault_fixed;
@@ -459,15 +462,13 @@
goto xen_fault;
propagate_page_fault(addr, regs->error_code);
- return 0;
+ return 0;
xen_fault:
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
perfc_incrc(copy_user_faults);
- if ( !shadow_mode_enabled(d) )
- DPRINTK("Page fault: %p -> %p\n", _p(regs->eip), _p(fixup));
regs->eip = fixup;
return 0;
}
@@ -1155,7 +1156,6 @@
void __init trap_init(void)
{
extern void percpu_traps_init(void);
- extern void cpu_init(void);
/*
* Note that interrupt gates are always used, rather than trap gates. We
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx.c Thu Aug 25 22:53:20 2005
@@ -65,7 +65,7 @@
* are not modified once set for generic domains, we don't save them,
* but simply reset them to the values set at percpu_traps_init().
*/
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
{
struct msr_state *host_state;
host_state = &percpu_msr[smp_processor_id()];
@@ -1712,9 +1712,6 @@
default:
__vmx_bug(®s); /* should not happen */
}
-
- vmx_intr_assist(v);
- return;
}
asmlinkage void load_cr2(void)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_intercept.c Thu Aug 25 22:53:20 2005
@@ -74,10 +74,10 @@
static void pit_cal_count(struct vmx_virpit_t *vpit)
{
- unsigned int usec_delta = (unsigned int)((NOW() - vpit->inject_point) /
1000);
- if (usec_delta > vpit->period * 1000)
+ u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point));
+ if (nsec_delta > vpit->period)
VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:long time has passed from last
injection!");
- vpit->count = vpit->init_val - ((usec_delta * PIT_FREQ / 1000000) %
vpit->init_val );
+ vpit->count = vpit->init_val - ((nsec_delta * PIT_FREQ / 1000000000ULL) %
vpit->init_val );
}
static void pit_latch_io(struct vmx_virpit_t *vpit)
@@ -197,9 +197,10 @@
static void pit_timer_fn(void *data)
{
struct vmx_virpit_t *vpit = data;
- int missed_ticks;
-
- missed_ticks = (NOW() - vpit->scheduled) / MILLISECS(vpit->period);
+ s_time_t next;
+ int missed_ticks;
+
+ missed_ticks = (NOW() - vpit->scheduled)/(s_time_t) vpit->period;
/* Set the pending intr bit, and send evtchn notification to myself. */
if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
@@ -208,12 +209,12 @@
/* pick up missed timer tick */
if ( missed_ticks > 0 ) {
vpit->pending_intr_nr += missed_ticks;
- vpit->scheduled += missed_ticks * MILLISECS(vpit->period);
- }
- vpit->scheduled += MILLISECS(vpit->period);
- set_ac_timer(&vpit->pit_timer, vpit->scheduled);
-}
-
+ vpit->scheduled += missed_ticks * vpit->period;
+ }
+ next = vpit->scheduled + vpit->period;
+ set_ac_timer(&vpit->pit_timer, next);
+ vpit->scheduled = next;
+}
/* Only some PIT operations such as load init counter need a hypervisor hook.
* leave all other operations in user space DM
@@ -236,16 +237,17 @@
reinit = 1;
}
else
- init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, 0);
+ init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, d->processor);
/* init count for this channel */
vpit->init_val = (p->u.data & 0xFFFF) ;
- /* frequency(ms) of pit */
- vpit->period = DIV_ROUND(((vpit->init_val) * 1000), PIT_FREQ);
- if (vpit->period < 1) {
+ /* frequency(ns) of pit */
+ vpit->period = DIV_ROUND(((vpit->init_val) * 1000000000ULL),
PIT_FREQ);
+ VMX_DBG_LOG(DBG_LEVEL_1,"VMX_PIT: guest set init pit freq:%u ns,
initval:0x%x\n", vpit->period, vpit->init_val);
+ if (vpit->period < 900000) { /* < 0.9 ms */
printk("VMX_PIT: guest programmed too small an init_val: %x\n",
vpit->init_val);
- vpit->period = 1;
+ vpit->period = 1000000;
}
vpit->vector = ((p->u.data >> 16) & 0xFF);
vpit->channel = ((p->u.data >> 24) & 0x3);
@@ -272,7 +274,7 @@
vpit->intr_bitmap = intr;
- vpit->scheduled = NOW() + MILLISECS(vpit->period);
+ vpit->scheduled = NOW() + vpit->period;
set_ac_timer(&vpit->pit_timer, vpit->scheduled);
/*restore the state*/
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_io.c Thu Aug 25 22:53:20 2005
@@ -631,12 +631,14 @@
return ((eflags & X86_EFLAGS_IF) == 0);
}
-void vmx_intr_assist(struct vcpu *v)
+asmlinkage void vmx_intr_assist(void)
{
int intr_type = 0;
- int highest_vector = find_highest_pending_irq(v, &intr_type);
+ int highest_vector;
unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
-
+ struct vcpu *v = current;
+
+ highest_vector = find_highest_pending_irq(v, &intr_type);
__vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
if (highest_vector == -1) {
@@ -712,9 +714,6 @@
/* We can't resume the guest if we're waiting on I/O */
ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
-
- /* We always check for interrupts before resuming guest */
- vmx_intr_assist(d);
}
#endif /* CONFIG_VMX */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_vmcs.c Thu Aug 25 22:53:20 2005
@@ -187,46 +187,52 @@
return 0;
}
-void vmx_do_launch(struct vcpu *v)
-{
-/* Update CR3, GDT, LDT, TR */
+void vmx_set_host_env(struct vcpu *v)
+{
unsigned int tr, cpu, error = 0;
struct host_execution_env host_env;
struct Xgt_desc_struct desc;
- unsigned long pfn = 0;
- struct pfn_info *page;
- struct cpu_user_regs *regs = guest_cpu_user_regs();
-
- vmx_stts();
cpu = smp_processor_id();
-
- page = (struct pfn_info *) alloc_domheap_page(NULL);
- pfn = (unsigned long) (page - frame_table);
-
- vmx_setup_platform(v, regs);
-
__asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
host_env.idtr_limit = desc.size;
host_env.idtr_base = desc.address;
error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
-
+
__asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
host_env.gdtr_limit = desc.size;
host_env.gdtr_base = desc.address;
error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
+ __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
+ host_env.tr_selector = tr;
+ host_env.tr_limit = sizeof(struct tss_struct);
+ host_env.tr_base = (unsigned long) &init_tss[cpu];
+ error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
+ error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
+}
+
+void vmx_do_launch(struct vcpu *v)
+{
+/* Update CR3, GDT, LDT, TR */
+ unsigned int error = 0;
+ unsigned long pfn = 0;
+ struct pfn_info *page;
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+ vmx_stts();
+
+ page = (struct pfn_info *) alloc_domheap_page(NULL);
+ pfn = (unsigned long) (page - frame_table);
+
+ vmx_setup_platform(v, regs);
+
+ vmx_set_host_env(v);
+
error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
error |= __vmwrite(GUEST_LDTR_BASE, 0);
error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
- __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
- host_env.tr_selector = tr;
- host_env.tr_limit = sizeof(struct tss_struct);
- host_env.tr_base = (unsigned long) &init_tss[cpu];
-
- error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
- error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
error |= __vmwrite(GUEST_TR_BASE, 0);
error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
@@ -523,12 +529,48 @@
void vm_launch_fail(unsigned long eflags)
{
+ unsigned long error;
+ __vmread(VM_INSTRUCTION_ERROR, &error);
+ printk("<vm_launch_fail> error code %lx\n", error);
__vmx_bug(guest_cpu_user_regs());
}
void vm_resume_fail(unsigned long eflags)
{
+ unsigned long error;
+ __vmread(VM_INSTRUCTION_ERROR, &error);
+ printk("<vm_resume_fail> error code %lx\n", error);
__vmx_bug(guest_cpu_user_regs());
+}
+
+void arch_vmx_do_resume(struct vcpu *v)
+{
+ u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+ load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+ vmx_do_resume(v);
+ reset_stack_and_jump(vmx_asm_do_resume);
+}
+
+void arch_vmx_do_launch(struct vcpu *v)
+{
+ u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+ load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+ vmx_do_launch(v);
+ reset_stack_and_jump(vmx_asm_do_launch);
+}
+
+void arch_vmx_do_relaunch(struct vcpu *v)
+{
+ u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+ load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+ vmx_do_resume(v);
+ vmx_set_host_env(v);
+ v->arch.schedule_tail = arch_vmx_do_resume;
+
+ reset_stack_and_jump(vmx_asm_do_relaunch);
}
#endif /* CONFIG_VMX */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/entry.S Thu Aug 25 22:53:20 2005
@@ -108,31 +108,26 @@
pushl %ecx; \
pushl %ebx;
+#define VMX_RESTORE_ALL_NOSEGREGS \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax; \
+ addl $(NR_SKIPPED_REGS*4), %esp
+
ENTRY(vmx_asm_vmexit_handler)
/* selectors are restored/saved by VMX */
VMX_SAVE_ALL_NOSEGREGS
call vmx_vmexit_handler
jmp vmx_asm_do_resume
-ENTRY(vmx_asm_do_launch)
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $(NR_SKIPPED_REGS*4), %esp
- /* VMLUANCH */
- .byte 0x0f,0x01,0xc2
- pushf
- call vm_launch_fail
- hlt
-
- ALIGN
-
-ENTRY(vmx_asm_do_resume)
-vmx_test_all_events:
+.macro vmx_asm_common launch initialized
+1:
+/* vmx_test_all_events */
+ .if \initialized
GET_CURRENT(%ebx)
/*test_all_events:*/
xorl %ecx,%ecx
@@ -142,34 +137,51 @@
movl VCPU_processor(%ebx),%eax
shl $IRQSTAT_shift,%eax
test %ecx,irq_stat(%eax,1)
- jnz vmx_process_softirqs
-
-vmx_restore_all_guest:
+ jnz 2f
+
+/* vmx_restore_all_guest */
+ call vmx_intr_assist
call load_cr2
+ .endif
+ VMX_RESTORE_ALL_NOSEGREGS
/*
* Check if we are going back to VMX-based VM
* By this time, all the setups in the VMCS must be complete.
*/
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $(NR_SKIPPED_REGS*4), %esp
+ .if \launch
+ /* VMLUANCH */
+ .byte 0x0f,0x01,0xc2
+ pushf
+ call vm_launch_fail
+ .else
/* VMRESUME */
.byte 0x0f,0x01,0xc3
pushf
call vm_resume_fail
+ .endif
/* Should never reach here */
hlt
ALIGN
-vmx_process_softirqs:
+ .if \initialized
+2:
+/* vmx_process_softirqs */
sti
call do_softirq
- jmp vmx_test_all_events
+ jmp 1b
+ ALIGN
+ .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+ vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+ vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+ vmx_asm_common 1 1
+
#endif
ALIGN
@@ -335,7 +347,8 @@
movl VCPU_vcpu_info(%ebx),%eax
pushl VCPUINFO_upcall_mask(%eax)
testb $TBF_INTERRUPT,%cl
- setnz VCPUINFO_upcall_mask(%eax) # TBF_INTERRUPT -> clear upcall mask
+ setnz %ch # TBF_INTERRUPT -> set upcall mask
+ orb %ch,VCPUINFO_upcall_mask(%eax)
popl %eax
shll $16,%eax # Bits 16-23: saved_upcall_mask
movw UREGS_cs+4(%esp),%ax # Bits 0-15: CS
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/mm.c Thu Aug 25 22:53:20 2005
@@ -93,13 +93,10 @@
/*
* Allocate and map the machine-to-phys table and create read-only mapping
- * of MPT for guest-OS use. Without PAE we'll end up with one 4MB page,
- * with PAE we'll allocate 2MB pages depending on the amount of memory
- * installed, but at least 4MB to cover 4GB address space. This is needed
- * to make PCI I/O memory address lookups work in guests.
+ * of MPT for guest-OS use.
*/
- if ( (mpt_size = max_page * 4) < (4*1024*1024) )
- mpt_size = 4*1024*1024;
+ mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+ mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
{
if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
@@ -148,7 +145,7 @@
void subarch_init_memory(struct domain *dom_xen)
{
unsigned long m2p_start_mfn;
- int i;
+ unsigned int i, j;
/*
* We are rather picky about the layout of 'struct pfn_info'. The
@@ -172,12 +169,12 @@
{
m2p_start_mfn = l2e_get_pfn(
idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
{
- frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ frame_table[m2p_start_mfn+j].count_info = PGC_allocated | 1;
/* Ensure it's only mapped read-only by domains. */
- frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
- page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
+ frame_table[m2p_start_mfn+j].u.inuse.type_info = PGT_gdt_page | 1;
+ page_set_owner(&frame_table[m2p_start_mfn+j], dom_xen);
}
}
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/traps.c Thu Aug 25 22:53:20 2005
@@ -1,5 +1,6 @@
#include <xen/config.h>
+#include <xen/domain_page.h>
#include <xen/init.h>
#include <xen/sched.h>
#include <xen/lib.h>
@@ -66,8 +67,9 @@
printk("CPU: %d\nEIP: %04lx:[<%08lx>]",
smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
- print_symbol(" %s\n", eip);
- printk("EFLAGS: %08lx CONTEXT: %s\n", eflags, context);
+ if ( !GUEST_MODE(regs) )
+ print_symbol(" %s", eip);
+ printk("\nEFLAGS: %08lx CONTEXT: %s\n", eflags, context);
printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
@@ -85,24 +87,33 @@
void show_page_walk(unsigned long addr)
{
- l2_pgentry_t pmd;
- l1_pgentry_t *pte;
-
- if ( addr < PAGE_OFFSET )
- return;
+ unsigned long pfn = read_cr3() >> PAGE_SHIFT;
+ intpte_t *ptab, ent;
printk("Pagetable walk from %08lx:\n", addr);
-
- pmd = idle_pg_table_l2[l2_linear_offset(addr)];
- printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd),
- (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
- if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
- (l2e_get_flags(pmd) & _PAGE_PSE) )
- return;
-
- pte = __va(l2e_get_paddr(pmd));
- pte += l1_table_offset(addr);
- printk(" L1 = %"PRIpte"\n", l1e_get_intpte(*pte));
+
+#ifdef CONFIG_X86_PAE
+ ptab = map_domain_page(pfn);
+ ent = ptab[l3_table_offset(addr)];
+ printk(" L3 = %"PRIpte"\n", ent);
+ unmap_domain_page(ptab);
+ if ( !(ent & _PAGE_PRESENT) )
+ return;
+ pfn = ent >> PAGE_SHIFT;
+#endif
+
+ ptab = map_domain_page(pfn);
+ ent = ptab[l2_table_offset(addr)];
+ printk(" L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : "");
+ unmap_domain_page(ptab);
+ if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
+ return;
+ pfn = ent >> PAGE_SHIFT;
+
+ ptab = map_domain_page(ent >> PAGE_SHIFT);
+ ent = ptab[l2_table_offset(addr)];
+ printk(" L1 = %"PRIpte"\n", ent);
+ unmap_domain_page(ptab);
}
#define DOUBLEFAULT_STACK_SIZE 1024
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/entry.S Thu Aug 25 22:53:20 2005
@@ -194,39 +194,34 @@
pushq %r14; \
pushq %r15; \
+#define VMX_RESTORE_ALL_NOSEGREGS \
+ popq %r15; \
+ popq %r14; \
+ popq %r13; \
+ popq %r12; \
+ popq %rbp; \
+ popq %rbx; \
+ popq %r11; \
+ popq %r10; \
+ popq %r9; \
+ popq %r8; \
+ popq %rax; \
+ popq %rcx; \
+ popq %rdx; \
+ popq %rsi; \
+ popq %rdi; \
+ addq $(NR_SKIPPED_REGS*8), %rsp; \
+
ENTRY(vmx_asm_vmexit_handler)
/* selectors are restored/saved by VMX */
VMX_SAVE_ALL_NOSEGREGS
call vmx_vmexit_handler
jmp vmx_asm_do_resume
-ENTRY(vmx_asm_do_launch)
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
- popq %rdi
- addq $(NR_SKIPPED_REGS*8), %rsp
- /* VMLUANCH */
- .byte 0x0f,0x01,0xc2
- pushfq
- call vm_launch_fail
- hlt
-
- ALIGN
-
-ENTRY(vmx_asm_do_resume)
-vmx_test_all_events:
+.macro vmx_asm_common launch initialized
+1:
+ .if \initialized
+/* vmx_test_all_events */
GET_CURRENT(%rbx)
/* test_all_events: */
cli # tests must not race interrupts
@@ -235,42 +230,52 @@
shl $IRQSTAT_shift,%rax
leaq irq_stat(%rip), %rdx
testl $~0,(%rdx,%rax,1)
- jnz vmx_process_softirqs
-
-vmx_restore_all_guest:
+ jnz 2f
+
+/* vmx_restore_all_guest */
+ call vmx_intr_assist
call load_cr2
+ .endif
/*
* Check if we are going back to VMX-based VM
* By this time, all the setups in the VMCS must be complete.
*/
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
- popq %rdi
- addq $(NR_SKIPPED_REGS*8), %rsp
+ VMX_RESTORE_ALL_NOSEGREGS
+ .if \launch
+ /* VMLUANCH */
+ .byte 0x0f,0x01,0xc2
+ pushfq
+ call vm_launch_fail
+ .else
/* VMRESUME */
.byte 0x0f,0x01,0xc3
pushfq
call vm_resume_fail
+ .endif
/* Should never reach here */
hlt
ALIGN
-vmx_process_softirqs:
+
+ .if \initialized
+2:
+/* vmx_process_softirqs */
sti
call do_softirq
- jmp vmx_test_all_events
+ jmp 1b
+ ALIGN
+ .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+ vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+ vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+ vmx_asm_common 1 1
+
#endif
ALIGN
@@ -314,7 +319,8 @@
movq VCPU_vcpu_info(%rbx),%rax
pushq VCPUINFO_upcall_mask(%rax)
testb $TBF_INTERRUPT,%cl
- setnz VCPUINFO_upcall_mask(%rax)# TBF_INTERRUPT -> clear upcall mask
+ setnz %ch # TBF_INTERRUPT -> set upcall mask
+ orb %ch,VCPUINFO_upcall_mask(%rax)
popq %rax
shlq $32,%rax # Bits 32-39: saved_upcall_mask
movw UREGS_cs+8(%rsp),%ax # Bits 0-15: CS
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/mm.c Thu Aug 25 22:53:20 2005
@@ -74,7 +74,7 @@
void __init paging_init(void)
{
- unsigned long i;
+ unsigned long i, mpt_size;
l3_pgentry_t *l3_ro_mpt;
l2_pgentry_t *l2_ro_mpt;
struct pfn_info *pg;
@@ -98,16 +98,17 @@
* Allocate and map the machine-to-phys table.
* This also ensures L3 is present for fixmaps.
*/
- for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
- {
- pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0);
- if ( pg == NULL )
+ mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+ mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+ for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+ {
+ if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
panic("Not enough memory for m2p table\n");
map_pages_to_xen(
- RDWR_MPT_VIRT_START + i*8, page_to_pfn(pg),
+ RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_pfn(pg),
1UL << PAGETABLE_ORDER,
PAGE_HYPERVISOR);
- memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
+ memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
1UL << L2_PAGETABLE_SHIFT);
*l2_ro_mpt++ = l2e_from_page(
pg, _PAGE_GLOBAL|_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/traps.c Thu Aug 25 22:53:20 2005
@@ -17,8 +17,9 @@
{
printk("CPU: %d\nEIP: %04x:[<%016lx>]",
smp_processor_id(), 0xffff & regs->cs, regs->rip);
- print_symbol(" %s\n", regs->rip);
- printk("EFLAGS: %016lx\n", regs->eflags);
+ if ( !GUEST_MODE(regs) )
+ print_symbol(" %s", regs->rip);
+ printk("\nEFLAGS: %016lx\n", regs->eflags);
printk("rax: %016lx rbx: %016lx rcx: %016lx rdx: %016lx\n",
regs->rax, regs->rbx, regs->rcx, regs->rdx);
printk("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/dom0_ops.c Thu Aug 25 22:53:20 2005
@@ -70,8 +70,7 @@
flags &= ~DOMFLAGS_BLOCKED;
if ( v->vcpu_flags & VCPUF_running )
flags |= DOMFLAGS_RUNNING;
- if ( v->cpu_time > cpu_time )
- cpu_time += v->cpu_time;
+ cpu_time += v->cpu_time;
vcpu_count++;
}
@@ -294,17 +293,17 @@
v->cpumap = cpumap;
if ( cpumap == CPUMAP_RUNANYWHERE )
+ {
clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
+ }
else
{
/* pick a new cpu from the usable map */
int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
vcpu_pause(v);
- if ( v->processor != new_cpu )
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ vcpu_migrate_cpu(v, new_cpu);
set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
- v->processor = new_cpu;
vcpu_unpause(v);
}
@@ -475,7 +474,7 @@
case DOM0_SETTIME:
{
do_settime(op->u.settime.secs,
- op->u.settime.usecs,
+ op->u.settime.nsecs,
op->u.settime.system_time);
ret = 0;
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/event_channel.c
--- a/xen/common/event_channel.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/event_channel.c Thu Aug 25 22:53:20 2005
@@ -588,7 +588,6 @@
long rc = 0;
if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] == NULL) ) {
- printf("vcpu %d bad.\n", vcpu);
return -EINVAL;
}
@@ -596,7 +595,6 @@
if ( !port_is_valid(d, port) )
{
- printf("port %d bad.\n", port);
rc = -EINVAL;
goto out;
}
@@ -610,7 +608,6 @@
chn->notify_vcpu_id = vcpu;
break;
default:
- printf("evtchn type %d can't be rebound.\n", chn->state);
rc = -EINVAL;
break;
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/grant_table.c
--- a/xen/common/grant_table.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/grant_table.c Thu Aug 25 22:53:20 2005
@@ -6,6 +6,8 @@
*
* Copyright (c) 2005 Christopher Clark
* Copyright (c) 2004 K A Fraser
+ * Copyright (c) 2005 Andrew Warfield
+ * Modifications by Geoffrey Lefebvre are (c) Intel Research Cambridge
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -50,7 +52,7 @@
grant_table_t *t)
{
unsigned int h;
- if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
+ if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) )
return -1;
t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
t->map_count++;
@@ -68,13 +70,13 @@
static int
__gnttab_activate_grant_ref(
- struct domain *mapping_d, /* IN */
+ struct domain *mapping_d, /* IN */
struct vcpu *mapping_ed,
- struct domain *granting_d,
- grant_ref_t ref,
- u16 dev_hst_ro_flags,
- unsigned long host_virt_addr,
- unsigned long *pframe ) /* OUT */
+ struct domain *granting_d,
+ grant_ref_t ref,
+ u16 dev_hst_ro_flags,
+ u64 addr,
+ unsigned long *pframe ) /* OUT */
{
domid_t sdom;
u16 sflags;
@@ -95,7 +97,7 @@
* Returns:
* . -ve: error
* . 1: ok
- * . 0: ok and TLB invalidate of host_virt_addr needed.
+ * . 0: ok and TLB invalidate of host_addr needed.
*
* On success, *pframe contains mfn.
*/
@@ -121,6 +123,10 @@
sflags = sha->flags;
sdom = sha->domid;
+ /* This loop attempts to set the access (reading/writing) flags
+ * in the grant table entry. It tries a cmpxchg on the field
+ * up to five times, and then fails under the assumption that
+ * the guest is misbehaving. */
for ( ; ; )
{
u32 scombo, prev_scombo, new_scombo;
@@ -253,28 +259,32 @@
/*
* At this point:
- * act->pin updated to reflect mapping.
+ * act->pin updated to reference count mappings.
* sha->flags updated to indicate to granting domain mapping done.
* frame contains the mfn.
*/
spin_unlock(&granting_d->grant_table->lock);
- if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
+ if ( (addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
{
/* Write update into the pagetable. */
l1_pgentry_t pte;
pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
+
+ if ( (dev_hst_ro_flags & GNTMAP_application_map) )
+ l1e_add_flags(pte,_PAGE_USER);
if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
l1e_add_flags(pte,_PAGE_RW);
- rc = update_grant_va_mapping( host_virt_addr, pte,
- mapping_d, mapping_ed );
-
- /*
- * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
- * This is done in the outer gnttab_map_grant_ref.
- */
-
+
+ if ( dev_hst_ro_flags & GNTMAP_contains_pte )
+ rc = update_grant_pte_mapping(addr, pte, mapping_d, mapping_ed);
+ else
+ rc = update_grant_va_mapping(addr, pte, mapping_d, mapping_ed);
+
+ /* IMPORTANT: rc indicates the degree of TLB flush that is required.
+ * GNTST_flush_one (1) or GNTST_flush_all (2). This is done in the
+ * outer gnttab_map_grant_ref. */
if ( rc < 0 )
{
/* Failure: undo and abort. */
@@ -317,20 +327,24 @@
/*
* Returns 0 if TLB flush / invalidate required by caller.
* va will indicate the address to be invalidated.
+ *
+ * addr is _either_ a host virtual address, or the address of the pte to
+ * update, as indicated by the GNTMAP_contains_pte flag.
*/
static int
__gnttab_map_grant_ref(
gnttab_map_grant_ref_t *uop,
unsigned long *va)
{
- domid_t dom;
- grant_ref_t ref;
- struct domain *ld, *rd;
+ domid_t dom;
+ grant_ref_t ref;
+ struct domain *ld, *rd;
struct vcpu *led;
- u16 dev_hst_ro_flags;
- int handle;
- unsigned long frame = 0, host_virt_addr;
- int rc;
+ u16 dev_hst_ro_flags;
+ int handle;
+ u64 addr;
+ unsigned long frame = 0;
+ int rc;
led = current;
ld = led->domain;
@@ -338,19 +352,20 @@
/* Bitwise-OR avoids short-circuiting which screws control flow. */
if ( unlikely(__get_user(dom, &uop->dom) |
__get_user(ref, &uop->ref) |
- __get_user(host_virt_addr, &uop->host_virt_addr) |
+ __get_user(addr, &uop->host_addr) |
__get_user(dev_hst_ro_flags, &uop->flags)) )
{
DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
return -EFAULT; /* don't set status */
}
-
- if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
- unlikely(!__addr_ok(host_virt_addr)))
- {
- DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
- host_virt_addr, dev_hst_ro_flags);
+ if ( (dev_hst_ro_flags & GNTMAP_host_map) &&
+ ( (addr == 0) ||
+ (!(dev_hst_ro_flags & GNTMAP_contains_pte) &&
+ unlikely(!__addr_ok(addr))) ) )
+ {
+ DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n",
+ addr, dev_hst_ro_flags);
(void)__put_user(GNTST_bad_virt_addr, &uop->handle);
return GNTST_bad_gntref;
}
@@ -386,12 +401,20 @@
grant_mapping_t *new_mt;
grant_table_t *lgt = ld->grant_table;
+ if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES )
+ {
+ put_domain(rd);
+ DPRINTK("Maptrack table is at maximum size.\n");
+ (void)__put_user(GNTST_no_device_space, &uop->handle);
+ return GNTST_no_device_space;
+ }
+
/* Grow the maptrack table. */
new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
if ( new_mt == NULL )
{
put_domain(rd);
- DPRINTK("No more map handles available\n");
+ DPRINTK("No more map handles available.\n");
(void)__put_user(GNTST_no_device_space, &uop->handle);
return GNTST_no_device_space;
}
@@ -405,7 +428,7 @@
lgt->maptrack_order += 1;
lgt->maptrack_limit <<= 1;
- printk("Doubled maptrack size\n");
+ DPRINTK("Doubled maptrack size\n");
handle = get_maptrack_handle(ld->grant_table);
}
@@ -416,7 +439,7 @@
if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
dev_hst_ro_flags,
- host_virt_addr, &frame)))
+ addr, &frame)))
{
/*
* Only make the maptrack live _after_ writing the pte, in case we
@@ -428,10 +451,11 @@
= (ref << MAPTRACK_REF_SHIFT) |
(dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
- (void)__put_user(frame, &uop->dev_bus_addr);
-
- if ( dev_hst_ro_flags & GNTMAP_host_map )
- *va = host_virt_addr;
+ (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr);
+
+ if ( ( dev_hst_ro_flags & GNTMAP_host_map ) &&
+ !( dev_hst_ro_flags & GNTMAP_contains_pte) )
+ *va = addr;
(void)__put_user(handle, &uop->handle);
}
@@ -449,12 +473,12 @@
gnttab_map_grant_ref(
gnttab_map_grant_ref_t *uop, unsigned int count)
{
- int i, flush = 0;
+ int i, rc, flush = 0;
unsigned long va = 0;
for ( i = 0; i < count; i++ )
- if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
- flush++;
+ if ( (rc =__gnttab_map_grant_ref(&uop[i], &va)) >= 0 )
+ flush += rc;
if ( flush == 1 )
flush_tlb_one_mask(current->domain->cpumask, va);
@@ -469,28 +493,30 @@
gnttab_unmap_grant_ref_t *uop,
unsigned long *va)
{
- domid_t dom;
- grant_ref_t ref;
- u16 handle;
- struct domain *ld, *rd;
-
+ domid_t dom;
+ grant_ref_t ref;
+ u16 handle;
+ struct domain *ld, *rd;
active_grant_entry_t *act;
- grant_entry_t *sha;
+ grant_entry_t *sha;
grant_mapping_t *map;
- u16 flags;
- s16 rc = 1;
- unsigned long frame, virt;
+ u16 flags;
+ s16 rc = 1;
+ u64 addr, dev_bus_addr;
+ unsigned long frame;
ld = current->domain;
/* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
- __get_user(frame, &uop->dev_bus_addr) |
+ if ( unlikely(__get_user(addr, &uop->host_addr) |
+ __get_user(dev_bus_addr, &uop->dev_bus_addr) |
__get_user(handle, &uop->handle)) )
{
DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
return -EFAULT; /* don't set status */
}
+
+ frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT);
map = &ld->grant_table->maptrack[handle];
@@ -529,15 +555,6 @@
if ( frame == 0 )
{
frame = act->frame;
- }
- else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
- {
- if ( !( flags & GNTMAP_device_map ) )
- PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
- "Bad frame number: frame not mapped for dev access.\n");
- frame = act->frame;
-
- /* Frame will be unmapped for device access below if virt addr okay. */
}
else
{
@@ -554,41 +571,19 @@
/* Frame is now unmapped for device access. */
}
- if ( (virt != 0) &&
+ if ( (addr != 0) &&
(flags & GNTMAP_host_map) &&
((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
{
- l1_pgentry_t *pl1e;
- unsigned long _ol1e;
-
- pl1e = &linear_pg_table[l1_linear_offset(virt)];
-
- if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
- {
- DPRINTK("Could not find PTE entry for address %lx\n", virt);
- rc = -EINVAL;
- goto unmap_out;
- }
-
- /*
- * Check that the virtual address supplied is actually mapped to
- * act->frame.
- */
- if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
- {
- DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
- _ol1e, virt, frame);
- rc = -EINVAL;
- goto unmap_out;
- }
-
- /* Delete pagetable entry. */
- if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
- {
- DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
- pl1e, virt);
- rc = -EINVAL;
- goto unmap_out;
+ if ( flags & GNTMAP_contains_pte )
+ {
+ if ( (rc = clear_grant_pte_mapping(addr, frame, ld)) < 0 )
+ goto unmap_out;
+ }
+ else
+ {
+ if ( (rc = clear_grant_va_mapping(addr, frame)) < 0 )
+ goto unmap_out;
}
map->ref_and_flags &= ~GNTMAP_host_map;
@@ -596,17 +591,9 @@
act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
: GNTPIN_hstw_inc;
- if ( frame == GNTUNMAP_DEV_FROM_VIRT )
- {
- act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
- : GNTPIN_devw_inc;
-
- map->ref_and_flags &= ~GNTMAP_device_map;
- (void)__put_user(0, &uop->dev_bus_addr);
- }
-
rc = 0;
- *va = virt;
+ if ( !( flags & GNTMAP_contains_pte) )
+ *va = addr;
}
if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
@@ -630,6 +617,7 @@
if ( act->pin == 0 )
{
+ act->frame = 0xdeadbeef;
clear_bit(_GTF_reading, &sha->flags);
put_page(&frame_table[frame]);
}
@@ -768,7 +756,7 @@
if ( sha_copy.flags )
{
DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
- "dom:(%hu) frame:(%lx)\n",
+ "dom:(%hu) frame:(%x)\n",
op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
}
}
@@ -822,18 +810,20 @@
for (i = 0; i < count; i++) {
gnttab_donate_t *gop = &uop[i];
#if GRANT_DEBUG
- printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
i, gop->mfn, gop->domid, gop->handle);
#endif
page = &frame_table[gop->mfn];
-
+
if (unlikely(IS_XEN_HEAP_FRAME(page))) {
- printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long)
gop->mfn);
+ printk("gnttab_donate: xen heap frame mfn=%lx\n",
+ (unsigned long) gop->mfn);
gop->status = GNTST_bad_virt_addr;
continue;
}
if (unlikely(!pfn_valid(page_to_pfn(page)))) {
- printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long)
gop->mfn);
+ printk("gnttab_donate: invalid pfn for mfn=%lx\n",
+ (unsigned long) gop->mfn);
gop->status = GNTST_bad_virt_addr;
continue;
}
@@ -859,7 +849,8 @@
if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
(1 | PGC_allocated)) || unlikely(_nd != _d)) {
printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page),
+ " caf=%08x, taf=%" PRtype_info "\n",
+ (void *) page_to_pfn(page),
d, d->domain_id, unpickle_domptr(_nd), x,
page->u.inuse.type_info);
spin_unlock(&d->page_alloc_lock);
@@ -918,9 +909,9 @@
if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
unlikely(e->tot_pages == e->max_pages) ||
unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
- printk("gnttab_donate: Transferee has no reservation headroom
(%d,%d), or "
- "provided a bad grant ref (%08x), or is dying (%p).\n",
- e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ printk("gnttab_donate: Transferee has no reservation headroom (%d,"
+ "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
spin_unlock(&e->page_alloc_lock);
put_domain(e);
result = GNTST_general_error;
@@ -933,9 +924,9 @@
}
list_add_tail(&page->list, &e->page_list);
page_set_owner(page, e);
-
+
spin_unlock(&e->page_alloc_lock);
-
+
/*
* Transfer is all done: tell the guest about its new page
* frame.
@@ -943,7 +934,7 @@
gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
put_domain(e);
-
+
gop->status = GNTST_okay;
}
return result;
@@ -954,48 +945,53 @@
unsigned int cmd, void *uop, unsigned int count)
{
long rc;
-
+ struct domain *d = current->domain;
+
if ( count > 512 )
return -EINVAL;
-
- LOCK_BIGLOCK(current->domain);
-
+
+ LOCK_BIGLOCK(d);
+
+ sync_pagetable_state(d);
+
rc = -EFAULT;
switch ( cmd )
- {
- case GNTTABOP_map_grant_ref:
- if ( unlikely(!array_access_ok(
- uop, count, sizeof(gnttab_map_grant_ref_t))) )
- goto out;
- rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
- break;
- case GNTTABOP_unmap_grant_ref:
- if ( unlikely(!array_access_ok(
- uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
- goto out;
- rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
- break;
- case GNTTABOP_setup_table:
- rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
- break;
+ {
+ case GNTTABOP_map_grant_ref:
+ if ( unlikely(!array_access_ok(
+ uop, count, sizeof(gnttab_map_grant_ref_t))) )
+ goto out;
+ rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
+ break;
+ case GNTTABOP_unmap_grant_ref:
+ if ( unlikely(!array_access_ok(
+ uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+ goto out;
+ rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop,
+ count);
+ break;
+ case GNTTABOP_setup_table:
+ rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
+ break;
#if GRANT_DEBUG
- case GNTTABOP_dump_table:
- rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
- break;
+ case GNTTABOP_dump_table:
+ rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+ break;
#endif
- case GNTTABOP_donate:
- if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t))))
- goto out;
- rc = gnttab_donate(uop, count);
- break;
- default:
- rc = -ENOSYS;
- break;
- }
-
-out:
- UNLOCK_BIGLOCK(current->domain);
-
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(uop, count,
+ sizeof(gnttab_donate_t))))
+ goto out;
+ rc = gnttab_donate(uop, count);
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ out:
+ UNLOCK_BIGLOCK(d);
+
return rc;
}
@@ -1009,106 +1005,101 @@
* Called a _lot_ at domain creation because pages mapped by priv domains
* also traverse this.
*/
-
+
/* Note: If the same frame is mapped multiple times, and then one of
* the ptes is overwritten, which maptrack handle gets invalidated?
* Advice: Don't do it. Explicitly unmap.
*/
-
+
unsigned int handle, ref, refcount;
grant_table_t *lgt, *rgt;
active_grant_entry_t *act;
grant_mapping_t *map;
int found = 0;
-
+
lgt = ld->grant_table;
-
+
#if GRANT_DEBUG_VERBOSE
- if ( ld->domain_id != 0 )
- {
- DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
- rd->domain_id, ld->domain_id, frame, readonly);
- }
+ if ( ld->domain_ id != 0 ) {
+ DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+ rd->domain_id, ld->domain_id, frame, readonly);
+ }
#endif
-
+
/* Fast exit if we're not mapping anything using grant tables */
if ( lgt->map_count == 0 )
return 0;
-
- if ( get_domain(rd) == 0 )
- {
+
+ if ( get_domain(rd) == 0 ) {
DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
rd->domain_id);
return 0;
}
-
+
rgt = rd->grant_table;
-
- for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
- {
+
+ for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) {
+
map = &lgt->maptrack[handle];
-
+
if ( map->domid != rd->domain_id )
continue;
-
+
if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
- ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
- {
+ ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) {
+
ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
act = &rgt->active[ref];
-
+
spin_lock(&rgt->lock);
-
- if ( act->frame != frame )
- {
+
+ if ( act->frame != frame ) {
spin_unlock(&rgt->lock);
continue;
}
-
+
refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
- : GNTPIN_hstw_mask );
- if ( refcount == 0 )
- {
+ : GNTPIN_hstw_mask );
+
+ if ( refcount == 0 ) {
spin_unlock(&rgt->lock);
continue;
}
-
+
/* gotcha */
DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
rd->domain_id, ld->domain_id, frame, readonly);
-
+
if ( readonly )
act->pin -= GNTPIN_hstr_inc;
- else
- {
+ else {
act->pin -= GNTPIN_hstw_inc;
-
+
/* any more granted writable mappings? */
- if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
- {
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) {
clear_bit(_GTF_writing, &rgt->shared[ref].flags);
put_page_type(&frame_table[frame]);
}
}
-
- if ( act->pin == 0 )
- {
+
+ if ( act->pin == 0 ) {
clear_bit(_GTF_reading, &rgt->shared[ref].flags);
put_page(&frame_table[frame]);
}
+
spin_unlock(&rgt->lock);
-
+
clear_bit(GNTMAP_host_map, &map->ref_and_flags);
-
+
if ( !(map->ref_and_flags & GNTMAP_device_map) )
put_maptrack_handle(lgt, handle);
-
+
found = 1;
break;
}
}
put_domain(rd);
-
+
return found;
}
@@ -1124,8 +1115,10 @@
int retries = 0;
unsigned long target_pfn;
+#if GRANT_DEBUG_VERBOSE
DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
rd->domain_id, ld->domain_id, ref);
+#endif
if ( unlikely((rgt = rd->grant_table) == NULL) ||
unlikely(ref >= NR_GRANT_ENTRIES) )
@@ -1203,8 +1196,10 @@
grant_entry_t *sha;
unsigned long pfn;
+#if GRANT_DEBUG_VERBOSE
DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
rd->domain_id, ld->domain_id, ref);
+#endif
sha = &rd->grant_table->shared[ref];
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/lib.c
--- a/xen/common/lib.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/lib.c Thu Aug 25 22:53:20 2005
@@ -450,8 +450,10 @@
ret <<= 10;
case 'M': case 'm':
ret <<= 10;
- case 'K': case 'k':
+ case 'K': case 'k': default:
ret <<= 10;
+ case 'B': case 'b':
+ break;
}
return ret;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/page_alloc.c Thu Aug 25 22:53:20 2005
@@ -52,7 +52,6 @@
* One bit per page of memory. Bit set => page is allocated.
*/
-static unsigned long bitmap_size; /* in bytes */
static unsigned long *alloc_bitmap;
#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
@@ -135,10 +134,16 @@
/* Initialise allocator to handle up to @max_page pages. */
physaddr_t init_boot_allocator(physaddr_t bitmap_start)
{
+ unsigned long bitmap_size;
+
bitmap_start = round_pgup(bitmap_start);
- /* Allocate space for the allocation bitmap. */
+ /*
+ * Allocate space for the allocation bitmap. Include an extra longword
+ * of padding for possible overrun in map_alloc and map_free.
+ */
bitmap_size = max_page / 8;
+ bitmap_size += sizeof(unsigned long);
bitmap_size = round_pgup(bitmap_size);
alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
@@ -171,7 +176,7 @@
else if ( *p != '\0' )
break;
- if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
+ if ( (bad_pfn < max_page) && !allocated_in_map(bad_pfn) )
{
printk("Marking page %lx as bad\n", bad_pfn);
map_alloc(bad_pfn, 1);
@@ -183,7 +188,7 @@
{
unsigned long pg, i;
- for ( pg = 0; (pg + nr_pfns) < (bitmap_size*8); pg += pfn_align )
+ for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
{
for ( i = 0; i < nr_pfns; i++ )
if ( allocated_in_map(pg + i) )
@@ -362,7 +367,7 @@
printk("Scrubbing Free RAM: ");
- for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
+ for ( pfn = 0; pfn < max_page; pfn++ )
{
/* Every 100MB, print a progress dot. */
if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
@@ -413,6 +418,8 @@
ps = round_pgup(ps);
pe = round_pgdown(pe);
+ if ( pe <= ps )
+ return;
memguard_guard_range(phys_to_virt(ps), pe - ps);
@@ -482,19 +489,25 @@
ps = round_pgup(ps) >> PAGE_SHIFT;
pe = round_pgdown(pe) >> PAGE_SHIFT;
-
- if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) {
- init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
- init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN),
- pe - MAX_DMADOM_PFN);
+ if ( pe <= ps )
+ return;
+
+ if ( (ps < MAX_DMADOM_PFN) && (pe > MAX_DMADOM_PFN) )
+ {
+ init_heap_pages(
+ MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
+ init_heap_pages(
+ MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), pe - MAX_DMADOM_PFN);
}
else
+ {
init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
-}
-
-
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order,
- unsigned int flags)
+ }
+}
+
+
+struct pfn_info *alloc_domheap_pages(
+ struct domain *d, unsigned int order, unsigned int flags)
{
struct pfn_info *pg;
cpumask_t mask;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/perfc.c
--- a/xen/common/perfc.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/perfc.c Thu Aug 25 22:53:20 2005
@@ -7,6 +7,7 @@
#include <xen/spinlock.h>
#include <public/dom0_ops.h>
#include <asm/uaccess.h>
+#include <xen/mm.h>
#undef PERFCOUNTER
#undef PERFCOUNTER_CPU
@@ -81,6 +82,10 @@
}
printk("\n");
}
+
+#ifdef PERF_ARRAYS
+ ptwr_eip_stat_print();
+#endif
}
void perfc_reset(unsigned char key)
@@ -118,6 +123,10 @@
break;
}
}
+
+#ifdef PERF_ARRAYS
+ ptwr_eip_stat_reset();
+#endif
}
static dom0_perfc_desc_t perfc_d[NR_PERFCTRS];
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/schedule.c
--- a/xen/common/schedule.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/schedule.c Thu Aug 25 22:53:20 2005
@@ -38,6 +38,8 @@
#include <xen/mm.h>
#include <public/sched_ctl.h>
+extern void arch_getdomaininfo_ctxt(struct vcpu *,
+ struct vcpu_guest_context *);
/* opt_sched: scheduler - default to SEDF */
static char opt_sched[10] = "sedf";
string_param("sched", opt_sched);
@@ -82,7 +84,8 @@
int i;
SCHED_OP(free_task, d);
- for (i = 0; i < MAX_VIRT_CPUS; i++)
+ /* vcpu 0 has to be the last one destructed. */
+ for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
if ( d->vcpu[i] )
arch_free_vcpu_struct(d->vcpu[i]);
@@ -295,10 +298,36 @@
return 0;
}
+static long do_vcpu_pickle(int vcpu, unsigned long arg)
+{
+ struct vcpu *v;
+ vcpu_guest_context_t *c;
+ int ret = 0;
+
+ if (vcpu >= MAX_VIRT_CPUS)
+ return -EINVAL;
+ v = current->domain->vcpu[vcpu];
+ if (!v)
+ return -ESRCH;
+ /* Don't pickle vcpus which are currently running */
+ if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
+ return -EBUSY;
+ }
+ c = xmalloc(vcpu_guest_context_t);
+ if (!c)
+ return -ENOMEM;
+ arch_getdomaininfo_ctxt(v, c);
+ if (copy_to_user((vcpu_guest_context_t *)arg,
+ (const vcpu_guest_context_t *)c, sizeof(*c)))
+ ret = -EFAULT;
+ xfree(c);
+ return ret;
+}
+
/*
* Demultiplex scheduler-related hypercalls.
*/
-long do_sched_op(unsigned long op)
+long do_sched_op(unsigned long op, unsigned long arg)
{
long ret = 0;
@@ -332,6 +361,11 @@
case SCHEDOP_vcpu_up:
{
ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
+ break;
+ }
+ case SCHEDOP_vcpu_pickle:
+ {
+ ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
break;
}
@@ -474,13 +508,14 @@
set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
- /* Must be protected by the schedule_lock! */
+ if ( unlikely(prev == next) )
+ {
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ return continue_running(prev);
+ }
+
+ clear_bit(_VCPUF_running, &prev->vcpu_flags);
set_bit(_VCPUF_running, &next->vcpu_flags);
-
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
- if ( unlikely(prev == next) )
- return continue_running(prev);
perfc_incrc(sched_ctx);
@@ -517,6 +552,10 @@
next->domain->domain_id, next->vcpu_id);
context_switch(prev, next);
+
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+ context_switch_finalise(next);
}
/* No locking needed -- pointer comparison is safe :-) */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/trace.c
--- a/xen/common/trace.c Wed Aug 24 02:43:18 2005
+++ b/xen/common/trace.c Thu Aug 25 22:53:20 2005
@@ -113,10 +113,10 @@
switch ( tbc->op)
{
case DOM0_TBUF_GET_INFO:
- tbc->cpu_mask = tb_cpu_mask;
- tbc->evt_mask = tb_event_mask;
- tbc->mach_addr = __pa(t_bufs[0]);
- tbc->size = opt_tbuf_size * PAGE_SIZE;
+ tbc->cpu_mask = tb_cpu_mask;
+ tbc->evt_mask = tb_event_mask;
+ tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT;
+ tbc->size = opt_tbuf_size * PAGE_SIZE;
break;
case DOM0_TBUF_SET_CPU_MASK:
tb_cpu_mask = tbc->cpu_mask;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Wed Aug 24 02:43:18 2005
+++ b/xen/drivers/char/console.c Thu Aug 25 22:53:20 2005
@@ -652,8 +652,9 @@
void panic(const char *fmt, ...)
{
va_list args;
- char buf[128], cpustr[10];
+ char buf[128];
unsigned long flags;
+ static spinlock_t lock = SPIN_LOCK_UNLOCKED;
extern void machine_restart(char *);
debugtrace_dump();
@@ -665,16 +666,13 @@
debugger_trap_immediate();
/* Spit out multiline message in one go. */
- spin_lock_irqsave(&console_lock, flags);
- __putstr("\n****************************************\n");
- __putstr("Panic on CPU");
- sprintf(cpustr, "%d", smp_processor_id());
- __putstr(cpustr);
- __putstr(":\n");
- __putstr(buf);
- __putstr("****************************************\n\n");
- __putstr("Reboot in five seconds...\n");
- spin_unlock_irqrestore(&console_lock, flags);
+ spin_lock_irqsave(&lock, flags);
+ printk("\n****************************************\n");
+ printk("Panic on CPU %d:\n", smp_processor_id());
+ printk(buf);
+ printk("****************************************\n\n");
+ printk("Reboot in five seconds...\n");
+ spin_unlock_irqrestore(&lock, flags);
watchdog_disable();
mdelay(5000);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c Wed Aug 24 02:43:18 2005
+++ b/xen/drivers/char/ns16550.c Thu Aug 25 22:53:20 2005
@@ -15,7 +15,12 @@
#include <xen/serial.h>
#include <asm/io.h>
-/* Config serial port with a string <baud>,DPS,<io-base>,<irq>. */
+/*
+ * Configure serial port with a string <baud>,DPS,<io-base>,<irq>.
+ * The tail of the string can be omitted if platform defaults are sufficient.
+ * If the baud rate is pre-configured, perhaps by a bootloader, then 'auto'
+ * can be specified in place of a numeric baud rate.
+ */
static char opt_com1[30] = "", opt_com2[30] = "";
string_param("com1", opt_com1);
string_param("com2", opt_com2);
@@ -154,7 +159,7 @@
ns_write_reg(uart, IER, 0);
/* Line control and baud-rate generator. */
- if ( uart->baud != 0 )
+ if ( uart->baud != BAUD_AUTO )
{
ns_write_reg(uart, LCR, lcr | LCR_DLAB);
ns_write_reg(uart, DLL, 115200/uart->baud); /* baud lo */
@@ -244,38 +249,50 @@
{
int baud;
+ /* No user-specified configuration? */
if ( (conf == NULL) || (*conf == '\0') )
- goto config_parsed;
-
- if ( (baud = simple_strtol(conf, &conf, 10)) != 0 )
+ {
+ /* Some platforms may automatically probe the UART configuartion. */
+ if ( uart->baud != 0 )
+ goto config_parsed;
+ return;
+ }
+
+ if ( strncmp(conf, "auto", 4) == 0 )
+ {
+ uart->baud = BAUD_AUTO;
+ conf += 4;
+ }
+ else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 )
uart->baud = baud;
if ( *conf != ',' )
goto config_parsed;
conf++;
- uart->data_bits = simple_strtol(conf, &conf, 10);
+ uart->data_bits = simple_strtoul(conf, &conf, 10);
uart->parity = parse_parity_char(*conf);
conf++;
- uart->stop_bits = simple_strtol(conf, &conf, 10);
+ uart->stop_bits = simple_strtoul(conf, &conf, 10);
if ( *conf == ',' )
{
conf++;
- uart->io_base = simple_strtol(conf, &conf, 0);
+ uart->io_base = simple_strtoul(conf, &conf, 0);
if ( *conf == ',' )
{
conf++;
- uart->irq = simple_strtol(conf, &conf, 10);
+ uart->irq = simple_strtoul(conf, &conf, 10);
}
}
config_parsed:
/* Sanity checks. */
- if ( (uart->baud != 0) && ((uart->baud < 1200) || (uart->baud > 115200)) )
+ if ( (uart->baud != BAUD_AUTO) &&
+ ((uart->baud < 1200) || (uart->baud > 115200)) )
PARSE_ERR("Baud rate %d outside supported range.", uart->baud);
if ( (uart->data_bits < 5) || (uart->data_bits > 8) )
PARSE_ERR("%d data bits are unsupported.", uart->data_bits);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/e820.h Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
#include <asm/page.h>
-#define E820MAX 32
+#define E820MAX 128
#define E820_RAM 1
#define E820_RESERVED 2
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/event.h Thu Aug 25 22:53:20 2005
@@ -11,6 +11,19 @@
static inline void evtchn_notify(struct vcpu *v)
{
+ /*
+ * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
+ * pending flag. These values may fluctuate (after all, we hold no
+ * locks) but the key insight is that each change will cause
+ * evtchn_upcall_pending to be polled.
+ *
+ * NB2. We save VCPUF_running across the unblock to avoid a needless
+ * IPI for domains that we IPI'd to unblock.
+ */
+ int running = test_bit(_VCPUF_running, &v->vcpu_flags);
+ vcpu_unblock(v);
+ if ( running )
+ smp_send_event_check_cpu(v->processor);
}
#endif
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/io.h
--- a/xen/include/asm-x86/io.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/io.h Thu Aug 25 22:53:20 2005
@@ -2,6 +2,7 @@
#define _ASM_IO_H
#include <xen/config.h>
+#include <xen/types.h>
#include <asm/page.h>
#define IO_SPACE_LIMIT 0xffff
@@ -45,11 +46,7 @@
/*
* Change "struct pfn_info" to physical address.
*/
-#ifdef CONFIG_HIGHMEM64G
-#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT)
-#else
-#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT)
-#endif
+#define page_to_phys(page) ((physaddr_t)(page - frame_table) << PAGE_SHIFT)
#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/mm.h Thu Aug 25 22:53:20 2005
@@ -36,7 +36,7 @@
/* Owner of this page (NULL if page is anonymous). */
u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
- u32 type_info;
+ unsigned long type_info;
} inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
@@ -77,6 +77,7 @@
/* Owning guest has pinned this page to its current type? */
#define _PGT_pinned 27
#define PGT_pinned (1U<<_PGT_pinned)
+#if defined(__i386__)
/* The 11 most significant bits of virt address if this is a page table. */
#define PGT_va_shift 16
#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
@@ -84,6 +85,16 @@
#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
+#elif defined(__x86_64__)
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 32
+#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+#endif
+
/* 16-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1U<<16)-1)
@@ -114,11 +125,13 @@
#if defined(__i386__)
#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
+#define PRtype_info "08lx" /* should only be used for printk's */
#elif defined(__x86_64__)
static inline struct domain *unpickle_domptr(u32 _domain)
{ return (_domain == 0) ? NULL : __va(_domain); }
static inline u32 pickle_domptr(struct domain *domain)
{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+#define PRtype_info "016lx"/* should only be used for printk's */
#endif
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
@@ -144,8 +157,8 @@
extern unsigned long max_page;
void init_frametable(void);
-int alloc_page_type(struct pfn_info *page, unsigned int type);
-void free_page_type(struct pfn_info *page, unsigned int type);
+int alloc_page_type(struct pfn_info *page, unsigned long type);
+void free_page_type(struct pfn_info *page, unsigned long type);
extern void invalidate_shadow_ldt(struct vcpu *d);
extern int shadow_remove_all_write_access(
struct domain *d, unsigned long gpfn, unsigned long gmfn);
@@ -183,7 +196,7 @@
unlikely(d != _domain) ) /* Wrong owner? */
{
if ( !_shadow_mode_refcounts(domain) )
- DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%08x\n",
+ DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
PRtype_info "\n",
page_to_pfn(page), domain, unpickle_domptr(d),
x, page->u.inuse.type_info);
return 0;
@@ -200,7 +213,7 @@
}
void put_page_type(struct pfn_info *page);
-int get_page_type(struct pfn_info *page, u32 type);
+int get_page_type(struct pfn_info *page, unsigned long type);
int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
@@ -213,7 +226,7 @@
static inline int get_page_and_type(struct pfn_info *page,
struct domain *domain,
- u32 type)
+ unsigned long type)
{
int rc = get_page(page, domain);
@@ -300,6 +313,9 @@
unsigned int prev_nr_updates;
/* Exec domain which created writable mapping. */
struct vcpu *vcpu;
+ /* EIP of the address which took the original write fault
+ used for stats collection only */
+ unsigned long eip;
};
#define PTWR_PT_ACTIVE 0
@@ -311,7 +327,8 @@
int ptwr_init(struct domain *);
void ptwr_destroy(struct domain *);
void ptwr_flush(struct domain *, const int);
-int ptwr_do_page_fault(struct domain *, unsigned long);
+int ptwr_do_page_fault(struct domain *, unsigned long,
+ struct cpu_user_regs *);
int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
void cleanup_writable_pagetable(struct domain *d);
@@ -334,6 +351,18 @@
#define _audit_domain(_d, _f) ((void)0)
#define audit_domain(_d) ((void)0)
#define audit_domains() ((void)0)
+
+#endif
+
+#ifdef PERF_ARRAYS
+
+void ptwr_eip_stat_reset();
+void ptwr_eip_stat_print();
+
+#else
+
+#define ptwr_eip_stat_reset() ((void)0)
+#define ptwr_eip_stat_print() ((void)0)
#endif
@@ -345,8 +374,14 @@
* Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
* hold a reference to the page.
*/
-int update_grant_va_mapping(unsigned long va,
- l1_pgentry_t _nl1e,
- struct domain *d,
- struct vcpu *v);
+int update_grant_va_mapping(
+ unsigned long va, l1_pgentry_t _nl1e,
+ struct domain *d, struct vcpu *v);
+int update_grant_pte_mapping(
+ unsigned long pte_addr, l1_pgentry_t _nl1e,
+ struct domain *d, struct vcpu *v);
+int clear_grant_va_mapping(unsigned long addr, unsigned long frame);
+int clear_grant_pte_mapping(
+ unsigned long addr, unsigned long frame, struct domain *d);
+
#endif /* __ASM_X86_MM_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/page.h Thu Aug 25 22:53:20 2005
@@ -189,6 +189,9 @@
#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
#define pfn_valid(_pfn) ((_pfn) < max_page)
+#define pfn_to_phys(pfn) ((physaddr_t)(pfn) << PAGE_SHIFT)
+#define phys_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+
/* High table entries are reserved by the hypervisor. */
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
@@ -208,20 +211,21 @@
+ DOMAIN_ENTRIES_PER_L4_PAGETABLE)
#endif
-#define linear_l1_table \
+#define LINEAR_PT_OFFSET (LINEAR_PT_VIRT_START & VADDR_MASK)
+#define linear_l1_table \
((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
-#define __linear_l2_table \
- ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0))))
-#define __linear_l3_table \
- ((l3_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1))))
-#define __linear_l4_table \
- ((l4_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1)) + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<2))))
+#define __linear_l2_table \
+ ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0))))
+#define __linear_l3_table \
+ ((l3_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1))))
+#define __linear_l4_table \
+ ((l4_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)) + \
+ (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<2))))
#define linear_pg_table linear_l1_table
#define linear_l2_table(_ed) ((_ed)->arch.guest_vtable)
@@ -279,13 +283,9 @@
static __inline__ int get_order(unsigned long size)
{
int order;
-
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
+ size = (size-1) >> PAGE_SHIFT;
+ for ( order = 0; size; order++ )
size >>= 1;
- order++;
- } while (size);
return order;
}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/shadow.h Thu Aug 25 22:53:20 2005
@@ -483,9 +483,9 @@
#ifndef NDEBUG
else if ( mfn < max_page )
{
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
+ SH_VLOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
- SH_LOG("dom=%p caf=%08x taf=%08x",
+ SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info,
page_get_owner(&frame_table[mfn]),
frame_table[mfn].count_info,
frame_table[mfn].u.inuse.type_info );
@@ -602,14 +602,14 @@
/* XXX This needs more thought... */
printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
__func__, page_to_pfn(page));
- printk("Before: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
page->count_info, page->u.inuse.type_info);
shadow_lock(d);
shadow_remove_all_access(d, page_to_pfn(page));
shadow_unlock(d);
- printk("After: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
page->count_info, page->u.inuse.type_info);
}
@@ -648,7 +648,7 @@
if ( unlikely(nx == 0) )
{
- printk("get_shadow_ref overflow, gmfn=%x smfn=%lx\n",
+ printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
frame_table[smfn].u.inuse.type_info & PGT_mfn_mask,
smfn);
BUG();
@@ -678,7 +678,8 @@
if ( unlikely(x == 0) )
{
- printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%08x\n",
+ printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%"
+ PRtype_info "\n",
smfn,
frame_table[smfn].count_info,
frame_table[smfn].u.inuse.type_info);
@@ -735,7 +736,7 @@
if ( unlikely(!VALID_MFN(gmfn)) )
{
- SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
+ SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
*spte_p = l1e_empty();
return 0;
}
@@ -769,7 +770,7 @@
if ( unlikely(!VALID_MFN(mfn)) )
{
- SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
+ SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
*spte_p = l1e_empty();
return 0;
}
@@ -1200,7 +1201,7 @@
#ifndef NDEBUG
if ( ___shadow_status(d, gpfn, stype) != 0 )
{
- printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x "
+ printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%" PRtype_info
" "
"mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
d->domain_id, gpfn, gmfn, stype,
frame_table[gmfn].count_info,
@@ -1471,7 +1472,7 @@
/* We need to allocate a new node. Ensure the quicklist is non-empty. */
if ( unlikely(d->arch.shadow_ht_free == NULL) )
{
- SH_LOG("Allocate more shadow hashtable blocks.");
+ SH_VLOG("Allocate more shadow hashtable blocks.");
extra = xmalloc_bytes(
sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/time.h Thu Aug 25 22:53:20 2005
@@ -7,4 +7,7 @@
extern void calibrate_tsc_bp(void);
extern void calibrate_tsc_ap(void);
+struct domain;
+extern void init_domain_time(struct domain *d);
+
#endif /* __X86_TIME_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/types.h
--- a/xen/include/asm-x86/types.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/types.h Thu Aug 25 22:53:20 2005
@@ -38,13 +38,16 @@
typedef unsigned long long u64;
#if defined(CONFIG_X86_PAE)
typedef u64 physaddr_t;
+#define PRIphysaddr "016llx"
#else
-typedef u32 physaddr_t;
+typedef unsigned long physaddr_t;
+#define PRIphysaddr "08lx"
#endif
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
-typedef u64 physaddr_t;
+typedef unsigned long physaddr_t;
+#define PRIphysaddr "016lx"
#endif
typedef unsigned long size_t;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/uaccess.h
--- a/xen/include/asm-x86/uaccess.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/uaccess.h Thu Aug 25 22:53:20 2005
@@ -125,22 +125,20 @@
__pu_err; \
})
-#define __get_user_nocheck(x,ptr,size) \
-({ \
- long __gu_err, __gu_val; \
- __get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
+#define __get_user_nocheck(x,ptr,size) \
+({ \
+ long __gu_err; \
+ __get_user_size((x),(ptr),(size),__gu_err,-EFAULT); \
+ __gu_err; \
})
-#define __get_user_check(x,ptr,size) \
-({ \
- long __gu_err, __gu_val; \
- __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- __get_user_size(__gu_val,__gu_addr,(size),__gu_err,-EFAULT); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT; \
- __gu_err; \
+#define __get_user_check(x,ptr,size) \
+({ \
+ long __gu_err; \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __get_user_size((x),__gu_addr,(size),__gu_err,-EFAULT); \
+ if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT; \
+ __gu_err; \
})
struct __large_struct { unsigned long buf[100]; };
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/vmx.h Thu Aug 25 22:53:20 2005
@@ -31,10 +31,11 @@
extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
extern void vmx_asm_do_resume(void);
extern void vmx_asm_do_launch(void);
-extern void vmx_intr_assist(struct vcpu *d);
+extern void vmx_intr_assist(void);
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
+extern void arch_vmx_do_relaunch(struct vcpu *);
extern int vmcs_size;
extern unsigned int cpu_rev;
@@ -354,7 +355,7 @@
}
/* Make sure that xen intercepts any FP accesses from current */
-static inline void vmx_stts()
+static inline void vmx_stts(void)
{
unsigned long cr0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h Thu Aug 25 22:53:20 2005
@@ -28,10 +28,10 @@
extern void stop_vmx(void);
#if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
void vmx_restore_msrs(struct vcpu *d);
#else
-#define vmx_load_msrs(_p, _n) ((void)0)
+#define vmx_load_msrs(_n) ((void)0)
#define vmx_restore_msrs(_v) ((void)0)
#endif
@@ -93,6 +93,7 @@
void vmx_do_launch(struct vcpu *);
void vmx_do_resume(struct vcpu *);
+void vmx_set_host_env(struct vcpu *);
struct vmcs_struct *alloc_vmcs(void);
void free_vmcs(struct vmcs_struct *);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_32/page-3level.h Thu Aug 25 22:53:20 2005
@@ -63,7 +63,7 @@
/* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
-#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
+#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_32/uaccess.h
--- a/xen/include/asm-x86/x86_32/uaccess.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_32/uaccess.h Thu Aug 25 22:53:20 2005
@@ -22,7 +22,11 @@
#define array_access_ok(addr,count,size) \
(likely(count < (~0UL/size)) && access_ok(addr,count*size))
+/* Undefined function to catch size mismatches on 64-bit get_user/put_user. */
+extern void __uaccess_var_not_u64(void);
+
#define __put_user_u64(x, addr, retval, errret) \
+ if (sizeof(x) != 8) __uaccess_var_not_u64(); \
__asm__ __volatile__( \
"1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
@@ -52,6 +56,7 @@
} while (0)
#define __get_user_u64(x, addr, retval, errret) \
+ if (sizeof(x) != 8) __uaccess_var_not_u64(); \
__asm__ __volatile__( \
"1: movl 0(%2),%%eax\n" \
"2: movl 4(%2),%%edx\n" \
@@ -67,7 +72,7 @@
" .long 1b,4b\n" \
" .long 2b,4b\n" \
".previous" \
- : "=r" (retval), "=A" (x) \
+ : "=r" (retval), "=&A" (x) \
: "r" (addr), "i"(errret), "0"(retval))
#define __get_user_size(x,ptr,size,retval,errret) \
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_64/page.h Thu Aug 25 22:53:20 2005
@@ -42,7 +42,8 @@
#endif /* !__ASSEMBLY__ */
/* Given a virtual address, get an entry offset into a linear page table. */
-#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT)
+#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
+#define l2_linear_offset(_a) (((_a) & VADDR_MASK) >> L2_PAGETABLE_SHIFT)
#define is_guest_l1_slot(_s) (1)
#define is_guest_l2_slot(_t, _s) (1)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-ia64.h Thu Aug 25 22:53:20 2005
@@ -12,9 +12,6 @@
#define MAX_VIRT_CPUS 1
#ifndef __ASSEMBLY__
-
-/* NB. Both the following are 64 bits each. */
-typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
#define MAX_NR_SECTION 32 // at most 32 memory holes
typedef struct {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-x86_32.h Thu Aug 25 22:53:20 2005
@@ -63,9 +63,6 @@
#ifndef __ASSEMBLY__
-/* NB. Both the following are 32 bits each. */
-typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
-
/*
* Send an array of these to HYPERVISOR_set_trap_table()
*/
@@ -74,10 +71,10 @@
#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
typedef struct trap_info {
- u8 vector; /* exception vector */
- u8 flags; /* 0-3: privilege level; 4: clear event enable? */
- u16 cs; /* code selector */
- memory_t address; /* code address */
+ u8 vector; /* exception vector */
+ u8 flags; /* 0-3: privilege level; 4: clear event enable? */
+ u16 cs; /* code selector */
+ unsigned long address; /* code offset */
} trap_info_t;
typedef struct cpu_user_regs {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-x86_64.h Thu Aug 25 22:53:20 2005
@@ -103,9 +103,6 @@
/* Bottom of switch_to_user stack frame. */
};
-/* NB. Both the following are 64 bits each. */
-typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
-
/*
* Send an array of these to HYPERVISOR_set_trap_table().
* N.B. As in x86/32 mode, the privilege level specifies which modes may enter
@@ -121,10 +118,10 @@
#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
typedef struct trap_info {
- u8 vector; /* exception vector */
- u8 flags; /* 0-3: privilege level; 4: clear event enable? */
- u16 cs; /* code selector */
- memory_t address; /* code address */
+ u8 vector; /* exception vector */
+ u8 flags; /* 0-3: privilege level; 4: clear event enable? */
+ u16 cs; /* code selector */
+ unsigned long address; /* code offset */
} trap_info_t;
typedef struct cpu_user_regs {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/dom0_ops.h Thu Aug 25 22:53:20 2005
@@ -19,7 +19,7 @@
* This makes sure that old versions of dom0 tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define DOM0_INTERFACE_VERSION 0xAAAA100E
+#define DOM0_INTERFACE_VERSION 0xAAAA1010
/************************************************************************/
@@ -27,10 +27,10 @@
typedef struct {
/* IN variables. */
domid_t domain;
- memory_t max_pfns;
+ unsigned long max_pfns;
void *buffer;
/* OUT variables. */
- memory_t num_pfns;
+ unsigned long num_pfns;
} dom0_getmemlist_t;
#define DOM0_SCHEDCTL 6
@@ -83,9 +83,9 @@
#define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */
#define DOMFLAGS_SHUTDOWNSHIFT 16
u32 flags;
- memory_t tot_pages;
- memory_t max_pages;
- memory_t shared_info_frame; /* MFN of shared_info struct */
+ unsigned long tot_pages;
+ unsigned long max_pages;
+ unsigned long shared_info_frame; /* MFN of shared_info struct */
u64 cpu_time;
u32 n_vcpu;
s32 vcpu_to_cpu[MAX_VIRT_CPUS]; /* current mapping */
@@ -131,14 +131,14 @@
} dom0_debug_t;
/*
- * Set clock such that it would read <secs,usecs> after 00:00:00 UTC,
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
* 1 January, 1970 if the current system time was <system_time>.
*/
#define DOM0_SETTIME 17
typedef struct {
/* IN variables. */
u32 secs;
- u32 usecs;
+ u32 nsecs;
u64 system_time;
} dom0_settime_t;
@@ -155,7 +155,7 @@
typedef struct {
/* IN variables. */
- memory_t pfn; /* Machine page frame number to query. */
+ unsigned long pfn; /* Machine page frame number to query. */
domid_t domain; /* To which domain does the frame belong? */
/* OUT variables. */
/* Is the page PINNED to a type? */
@@ -197,7 +197,7 @@
unsigned long cpu_mask;
u32 evt_mask;
/* OUT variables */
- memory_t mach_addr;
+ unsigned long buffer_mfn;
u32 size;
} dom0_tbufcontrol_t;
@@ -211,8 +211,8 @@
u32 sockets_per_node;
u32 nr_nodes;
u32 cpu_khz;
- memory_t total_pages;
- memory_t free_pages;
+ unsigned long total_pages;
+ unsigned long free_pages;
} dom0_physinfo_t;
/*
@@ -252,7 +252,7 @@
u32 op;
unsigned long *dirty_bitmap; /* pointer to locked buffer */
/* IN/OUT variables. */
- memory_t pages; /* size of buffer, updated with actual size */
+ unsigned long pages; /* size of buffer, updated with actual size */
/* OUT variables. */
dom0_shadow_control_stats_t stats;
} dom0_shadow_control_t;
@@ -260,15 +260,15 @@
#define DOM0_SETDOMAINMAXMEM 28
typedef struct {
/* IN variables. */
- domid_t domain;
- memory_t max_memkb;
+ domid_t domain;
+ unsigned long max_memkb;
} dom0_setdomainmaxmem_t;
#define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */
typedef struct {
/* IN variables. */
- domid_t domain;
- memory_t num;
+ domid_t domain;
+ unsigned long num;
/* IN/OUT variables. */
unsigned long *array;
} dom0_getpageframeinfo2_t;
@@ -283,12 +283,12 @@
#define DOM0_ADD_MEMTYPE 31
typedef struct {
/* IN variables. */
- memory_t pfn;
- memory_t nr_pfns;
- u32 type;
- /* OUT variables. */
- u32 handle;
- u32 reg;
+ unsigned long pfn;
+ unsigned long nr_pfns;
+ u32 type;
+ /* OUT variables. */
+ u32 handle;
+ u32 reg;
} dom0_add_memtype_t;
/*
@@ -311,8 +311,8 @@
/* IN variables. */
u32 reg;
/* OUT variables. */
- memory_t pfn;
- memory_t nr_pfns;
+ unsigned long pfn;
+ unsigned long nr_pfns;
u32 type;
} dom0_read_memtype_t;
@@ -361,10 +361,10 @@
typedef struct {
/* IN variables. */
domid_t first_domain;
- memory_t max_domains;
+ unsigned int max_domains;
dom0_getdomaininfo_t *buffer;
/* OUT variables. */
- memory_t num_domains;
+ unsigned int num_domains;
} dom0_getdomaininfolist_t;
#define DOM0_PLATFORM_QUIRK 39
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/grant_table.h Thu Aug 25 22:53:20 2005
@@ -142,7 +142,10 @@
* 1. If GNTPIN_map_for_dev is specified then <dev_bus_addr> is the address
* via which I/O devices may access the granted frame.
* 2. If GNTPIN_map_for_host is specified then a mapping will be added at
- * virtual address <host_virt_addr> in the current address space.
+ * either a host virtual address in the current address space, or at
+ * a PTE at the specified machine address. The type of mapping to
+ * perform is selected through the GNTMAP_contains_pte flag, and the
+ * address is specified in <host_addr>.
* 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
* host mapping is destroyed by other means then it is *NOT* guaranteed
* to be accounted to the correct grant reference!
@@ -150,18 +153,18 @@
#define GNTTABOP_map_grant_ref 0
typedef struct gnttab_map_grant_ref {
/* IN parameters. */
- memory_t host_virt_addr;
+ u64 host_addr;
domid_t dom;
grant_ref_t ref;
u16 flags; /* GNTMAP_* */
/* OUT parameters. */
s16 handle; /* +ve: handle; -ve: GNTST_* */
- memory_t dev_bus_addr;
+ u64 dev_bus_addr;
} gnttab_map_grant_ref_t;
/*
* GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
- * tracked by <handle>. If <host_virt_addr> or <dev_bus_addr> is zero, that
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
* field is ignored. If non-zero, they must refer to a device/host mapping
* that is tracked by <handle>
* NOTES:
@@ -173,14 +176,12 @@
#define GNTTABOP_unmap_grant_ref 1
typedef struct gnttab_unmap_grant_ref {
/* IN parameters. */
- memory_t host_virt_addr;
- memory_t dev_bus_addr;
+ u64 host_addr;
+ u64 dev_bus_addr;
u16 handle;
/* OUT parameters. */
s16 status; /* GNTST_* */
} gnttab_unmap_grant_ref_t;
-
-#define GNTUNMAP_DEV_FROM_VIRT (~0U)
/*
* GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
@@ -220,7 +221,7 @@
*/
#define GNTTABOP_donate 4
typedef struct {
- memory_t mfn; /* 0 */
+ unsigned long mfn; /* 0 */
domid_t domid; /* 4 */
u16 handle; /* 8 */
s16 status; /* 10: GNTST_* */
@@ -247,10 +248,18 @@
#define _GNTMAP_application_map (3)
#define GNTMAP_application_map (1<<_GNTMAP_application_map)
+ /*
+ * GNTMAP_contains_pte subflag:
+ * 0 => This map request contains a host virtual address.
+ * 1 => This map request contains the machine addess of the PTE to update.
+ */
+#define _GNTMAP_contains_pte (4)
+#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
+
/*
* Values for error status returns. All errors are -ve.
*/
-#define GNTST_okay (0)
+#define GNTST_okay (0) /* Normal return. */
#define GNTST_general_error (-1) /* General undefined error. */
#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/blkif.h Thu Aug 25 22:53:20 2005
@@ -18,7 +18,6 @@
#define BLKIF_OP_READ 0
#define BLKIF_OP_WRITE 1
-#define BLKIF_OP_PROBE 2
/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */
#define BLKIF_RING_SIZE 64
@@ -33,28 +32,22 @@
typedef struct blkif_request {
u8 operation; /* BLKIF_OP_??? */
u8 nr_segments; /* number of segments */
- blkif_vdev_t device; /* only for read/write requests */
+ blkif_vdev_t handle; /* only for read/write requests */
unsigned long id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
/* @f_a_s[4:0]=last_sect ; @f_a_s[9:5]=first_sect */
-#ifdef CONFIG_XEN_BLKDEV_GRANT
/* @f_a_s[:16]= grant reference (16 bits) */
-#else
- /* @f_a_s[:12]=@frame: machine page frame number. */
-#endif
/* @first_sect: first sector in frame to transfer (inclusive). */
/* @last_sect: last sector in frame to transfer (inclusive). */
unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} blkif_request_t;
-#define blkif_fas(_addr, _fs, _ls) ((addr)|((_fs)<<5)|(_ls))
+#define blkif_fas(_addr, _fs, _ls) ((_addr)|((_fs)<<5)|(_ls))
#define blkif_first_sect(_fas) (((_fas)>>5)&31)
#define blkif_last_sect(_fas) ((_fas)&31)
-#ifdef CONFIG_XEN_BLKDEV_GRANT
#define blkif_fas_from_gref(_gref, _fs, _ls) (((_gref)<<16)|((_fs)<<5)|(_ls))
#define blkif_gref_from_fas(_fas) ((_fas)>>16)
-#endif
typedef struct blkif_response {
unsigned long id; /* copied from request */
@@ -65,37 +58,17 @@
#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
/*
* Generate blkif ring structures and types.
*/
DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
-/*
- * BLKIF_OP_PROBE:
- * The request format for a probe request is constrained as follows:
- * @operation == BLKIF_OP_PROBE
- * @nr_segments == size of probe buffer in pages
- * @device == unused (zero)
- * @id == any value (echoed in response message)
- * @sector_num == unused (zero)
- * @frame_and_sects == list of page-sized buffers.
- * (i.e., @first_sect == 0, @last_sect == 7).
- *
- * The response is a list of vdisk_t elements copied into the out-of-band
- * probe buffer. On success the response status field contains the number
- * of vdisk_t elements.
- */
-
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
-typedef struct vdisk {
- blkif_sector_t capacity; /* Size in terms of 512-byte sectors. */
- blkif_vdev_t device; /* Device number (opaque 16 bit value). */
- u16 info; /* Device type and flags (VDISK_*). */
- u16 sector_size; /* Minimum alignment for requests. */
-} vdisk_t; /* 16 bytes */
-
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/domain_controller.h
--- a/xen/include/public/io/domain_controller.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/domain_controller.h Thu Aug 25 22:53:20 2005
@@ -139,7 +139,7 @@
*/
typedef struct blkif_fe_interface_connect {
u32 handle;
- memory_t shmem_frame;
+ unsigned long shmem_frame;
int shmem_ref;
} blkif_fe_interface_connect_t;
@@ -249,7 +249,7 @@
/* IN */
domid_t domid; /* Domain attached to new interface. */
u32 blkif_handle; /* Domain-specific interface handle. */
- memory_t shmem_frame; /* Page cont. shared comms window. */
+ unsigned long shmem_frame;/* Page cont. shared comms window. */
int shmem_ref; /* Grant table reference. */
u32 evtchn; /* Event channel for notifications. */
/* OUT */
@@ -364,9 +364,11 @@
* STATUS_CONNECTED message.
*/
typedef struct netif_fe_interface_connect {
- u32 handle;
- memory_t tx_shmem_frame;
- memory_t rx_shmem_frame;
+ u32 handle;
+ unsigned long tx_shmem_frame;
+ int tx_shmem_ref;
+ unsigned long rx_shmem_frame;
+ int rx_shmem_ref;
} netif_fe_interface_connect_t;
/*
@@ -484,11 +486,13 @@
*/
typedef struct netif_be_connect {
/* IN */
- domid_t domid; /* Domain attached to new interface. */
- u32 netif_handle; /* Domain-specific interface handle. */
- memory_t tx_shmem_frame; /* Page cont. tx shared comms window. */
- memory_t rx_shmem_frame; /* Page cont. rx shared comms window. */
- u16 evtchn; /* Event channel for notifications. */
+ domid_t domid; /* Domain attached to new interface. */
+ u32 netif_handle; /* Domain-specific interface handle. */
+ unsigned long tx_shmem_frame;/* Page cont. tx shared comms window. */
+ int tx_shmem_ref; /* Grant reference for above */
+ unsigned long rx_shmem_frame;/* Page cont. rx shared comms window. */
+ int rx_shmem_ref; /* Grant reference for above */
+ u16 evtchn; /* Event channel for notifications. */
/* OUT */
u32 status;
} netif_be_connect_t;
@@ -573,7 +577,7 @@
* STATUS_CONNECTED message.
*/
typedef struct usbif_fe_interface_connect {
- memory_t shmem_frame;
+ unsigned long shmem_frame;
} usbif_fe_interface_connect_t;
/*
@@ -656,7 +660,7 @@
typedef struct usbif_be_connect {
/* IN */
domid_t domid; /* Domain attached to new interface. */
- memory_t shmem_frame; /* Page cont. shared comms window. */
+ unsigned long shmem_frame;/* Page cont. shared comms window. */
u32 evtchn; /* Event channel for notifications. */
u32 bandwidth; /* Bandwidth allocated for isoch / int - us
* per 1ms frame (ie between 0 and 900 or 800
@@ -776,7 +780,7 @@
#define PDB_CONNECTION_STATUS_UP 1
#define PDB_CONNECTION_STATUS_DOWN 2
u32 status;
- memory_t ring; /* status: UP */
+ unsigned long ring; /* status: UP */
u32 evtchn; /* status: UP */
} pdb_connection_t, *pdb_connection_p;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/netif.h Thu Aug 25 22:53:20 2005
@@ -10,7 +10,7 @@
#define __XEN_PUBLIC_IO_NETIF_H__
typedef struct netif_tx_request {
- memory_t addr; /* Machine address of packet. */
+ unsigned long addr; /* Machine address of packet. */
u16 csum_blank:1; /* Proto csum field blank? */
u16 id:15; /* Echoed in response message. */
u16 size; /* Packet size in bytes. */
@@ -32,7 +32,7 @@
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
u32 addr; /* 0: Offset in page of start of received packet */
#else
- memory_t addr; /* Machine address of packet. */
+ unsigned long addr; /* Machine address of packet. */
#endif
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/physdev.h Thu Aug 25 22:53:20 2005
@@ -27,8 +27,8 @@
typedef struct physdevop_set_iobitmap {
/* IN */
- memory_t bitmap;
- u32 nr_ports;
+ u8 *bitmap;
+ u32 nr_ports;
} physdevop_set_iobitmap_t;
typedef struct physdevop_apic {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/trace.h
--- a/xen/include/public/trace.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/trace.h Thu Aug 25 22:53:20 2005
@@ -9,11 +9,21 @@
#define __XEN_PUBLIC_TRACE_H__
/* Trace classes */
-#define TRC_GEN 0x00010000 /* General trace */
-#define TRC_SCHED 0x00020000 /* Xen Scheduler trace */
-#define TRC_DOM0OP 0x00040000 /* Xen DOM0 operation trace */
-#define TRC_VMX 0x00080000 /* Xen VMX trace */
-#define TRC_ALL 0xffff0000
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN 0x0001f000 /* General trace */
+#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */
+#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */
+#define TRC_VMX 0x0008f000 /* Xen VMX trace */
+#define TRC_ALL 0xfffff000
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+/* trace subclasses for VMX */
+#define TRC_VMXEXIT 0x00081000 /* VMX exit trace */
+#define TRC_VMXTIMER 0x00082000 /* VMX timer trace */
+#define TRC_VMXINT 0x00084000 /* VMX interrupt trace */
+#define TRC_VMXIO 0x00088000 /* VMX io emulation trace */
+
/* Trace events per class */
@@ -31,9 +41,13 @@
#define TRC_SCHED_T_TIMER_FN (TRC_SCHED + 12)
#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED + 13)
-#define TRC_VMX_VMEXIT (TRC_VMX + 1)
-#define TRC_VMX_VECTOR (TRC_VMX + 2)
-#define TRC_VMX_INT (TRC_VMX + 3)
+/* trace events per subclass */
+#define TRC_VMX_VMEXIT (TRC_VMXEXIT + 1)
+#define TRC_VMX_VECTOR (TRC_VMXEXIT + 2)
+
+#define TRC_VMX_TIMER_INTR (TRC_VMXTIMER + 1)
+
+#define TRC_VMX_INT (TRC_VMXINT + 1)
/* This structure represents a single trace buffer record. */
struct t_rec {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/xen.h
--- a/xen/include/public/xen.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/xen.h Thu Aug 25 22:53:20 2005
@@ -171,9 +171,9 @@
unsigned int cmd;
union {
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */
- memory_t mfn;
+ unsigned long mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
- memory_t linear_addr;
+ unsigned long linear_addr;
};
union {
/* SET_LDT */
@@ -203,6 +203,7 @@
#define SCHEDOP_shutdown 2 /* Stop executing this domain. */
#define SCHEDOP_vcpu_down 3 /* make target VCPU not-runnable. */
#define SCHEDOP_vcpu_up 4 /* make target VCPU runnable. */
+#define SCHEDOP_vcpu_pickle 5 /* save a vcpu's context to memory. */
#define SCHEDOP_cmdmask 255 /* 8-bit command. */
#define SCHEDOP_reasonshift 8 /* 8-bit reason code. (SCHEDOP_shutdown) */
#define SCHEDOP_vcpushift 8 /* 8-bit VCPU target. (SCHEDOP_up|down) */
@@ -331,14 +332,15 @@
typedef struct vcpu_time_info {
/*
- * The following values are updated periodically (and not necessarily
- * atomically!). The guest OS detects this because 'time_version1' is
- * incremented just before updating these values, and 'time_version2' is
- * incremented immediately after. See the Xen-specific Linux code for an
- * example of how to read these values safely (arch/xen/kernel/time.c).
+ * Updates to the following values are preceded and followed by an
+ * increment of 'version'. The guest can therefore detect updates by
+ * looking for changes to 'version'. If the least-significant bit of
+ * the version number is set then an update is in progress and the guest
+ * must wait to read a consistent set of values.
+ * The correct way to interact with the version number is similar to
+ * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
*/
- u32 time_version1;
- u32 time_version2;
+ u32 version;
u64 tsc_timestamp; /* TSC at last update of time vals. */
u64 system_time; /* Time, in nanosecs, since boot. */
/*
@@ -400,8 +402,9 @@
* Wallclock time: updated only by control software. Guests should base
* their gettimeofday() syscall on this wallclock-base value.
*/
- u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
- u32 wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */
+ u32 wc_version; /* Version counter: see vcpu_time_info_t. */
+ u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
+ u32 wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
arch_shared_info_t arch;
@@ -435,18 +438,18 @@
#define MAX_GUEST_CMDLINE 1024
typedef struct start_info {
/* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
- memory_t nr_pages; /* Total pages allocated to this domain. */
- memory_t shared_info; /* MACHINE address of shared info struct. */
+ unsigned long nr_pages; /* Total pages allocated to this domain. */
+ unsigned long shared_info;/* MACHINE address of shared info struct. */
u32 flags; /* SIF_xxx flags. */
u16 domain_controller_evtchn;
/* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
- memory_t pt_base; /* VIRTUAL address of page directory. */
- memory_t nr_pt_frames; /* Number of bootstrap p.t. frames. */
- memory_t mfn_list; /* VIRTUAL address of page-frame list. */
- memory_t mod_start; /* VIRTUAL address of pre-loaded module. */
- memory_t mod_len; /* Size (bytes) of pre-loaded module. */
+ unsigned long pt_base; /* VIRTUAL address of page directory. */
+ unsigned long nr_pt_frames;/* Number of bootstrap p.t. frames. */
+ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */
+ unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
+ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
s8 cmd_line[MAX_GUEST_CMDLINE];
- memory_t store_mfn; /* MACHINE page number of shared page. */
+ unsigned long store_mfn; /* MACHINE page number of shared page. */
u16 store_evtchn; /* Event channel for store communication. */
} start_info_t;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/ac_timer.h
--- a/xen/include/xen/ac_timer.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/ac_timer.h Thu Aug 25 22:53:20 2005
@@ -10,6 +10,7 @@
#include <xen/spinlock.h>
#include <xen/time.h>
+#include <xen/string.h>
struct ac_timer {
/* System time expiry value (nanoseconds since boot). */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/domain.h Thu Aug 25 22:53:20 2005
@@ -15,7 +15,9 @@
extern void arch_do_boot_vcpu(struct vcpu *v);
extern int arch_set_info_guest(
- struct vcpu *d, struct vcpu_guest_context *c);
+ struct vcpu *v, struct vcpu_guest_context *c);
+
+extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
extern void free_perdomain_pt(struct domain *d);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/event.h
--- a/xen/include/xen/event.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/event.h Thu Aug 25 22:53:20 2005
@@ -26,30 +26,14 @@
{
struct domain *d = v->domain;
shared_info_t *s = d->shared_info;
- int running;
- /* These three operations must happen in strict order. */
+ /* These four operations must happen in strict order. */
if ( !test_and_set_bit(port, &s->evtchn_pending[0]) &&
!test_bit (port, &s->evtchn_mask[0]) &&
- !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) )
+ !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) &&
+ !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
{
- /* The VCPU pending flag must be set /after/ update to evtchn-pend. */
- set_bit(0, &v->vcpu_info->evtchn_upcall_pending);
evtchn_notify(v);
-
- /*
- * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
- * pending flag. These values may fluctuate (after all, we hold no
- * locks) but the key insight is that each change will cause
- * evtchn_upcall_pending to be polled.
- *
- * NB2. We save VCPUF_running across the unblock to avoid a needless
- * IPI for domains that we IPI'd to unblock.
- */
- running = test_bit(_VCPUF_running, &v->vcpu_flags);
- vcpu_unblock(v);
- if ( running )
- smp_send_event_check_cpu(v->processor);
}
}
@@ -73,8 +57,9 @@
*/
extern void send_guest_pirq(struct domain *d, int pirq);
-#define event_pending(_d) \
- ((_d)->vcpu_info->evtchn_upcall_pending && \
- !(_d)->vcpu_info->evtchn_upcall_mask)
+/* Note: Bitwise operations result in fast code with no branches. */
+#define event_pending(v) \
+ ((v)->vcpu_info->evtchn_upcall_pending & \
+ ~(v)->vcpu_info->evtchn_upcall_mask)
#endif /* __XEN_EVENT_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/grant_table.h Thu Aug 25 22:53:20 2005
@@ -53,19 +53,20 @@
#define ORDER_GRANT_FRAMES 2
#define NR_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES)
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE /
sizeof(grant_entry_t))
-
+#define NR_GRANT_ENTRIES \
+ ((NR_GRANT_FRAMES << PAGE_SHIFT) / sizeof(grant_entry_t))
/*
* Tracks a mapping of another domain's grant reference. Each domain has a
* table of these, indexes into which are returned as a 'mapping handle'.
*/
typedef struct {
- u16 ref_and_flags; /* 0-2: GNTMAP_* ; 3-15: grant ref */
+ u16 ref_and_flags; /* 0-4: GNTMAP_* ; 5-15: grant ref */
domid_t domid; /* granting domain */
} grant_mapping_t;
-#define MAPTRACK_GNTMAP_MASK 7
-#define MAPTRACK_REF_SHIFT 3
+#define MAPTRACK_GNTMAP_MASK 0x1f
+#define MAPTRACK_REF_SHIFT 5
+#define MAPTRACK_MAX_ENTRIES (1 << (16 - MAPTRACK_REF_SHIFT))
/* Per-domain grant information. */
typedef struct {
@@ -108,10 +109,15 @@
/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
void
gnttab_notify_transfer(
- struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long
frame);
+ struct domain *rd, struct domain *ld,
+ grant_ref_t ref, unsigned long frame);
-/* Pre-domain destruction release of granted device mappings of other
domains.*/
+/* Domain death release of granted device mappings of other domains.*/
void
gnttab_release_dev_mappings(grant_table_t *gt);
+/* Extra GNTST_ values, for internal use only. */
+#define GNTST_flush_all (2) /* Success, need to flush entire TLB. */
+#define GNTST_flush_one (1) /* Success, need to flush a vaddr. */
+
#endif /* __XEN_GRANT_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/perfc_defn.h
--- a/xen/include/xen/perfc_defn.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/perfc_defn.h Thu Aug 25 22:53:20 2005
@@ -1,5 +1,6 @@
-#ifndef __XEN_PERFC_DEFN_H__
-#define __XEN_PERFC_DEFN_H__
+/* This file is legitimately included multiple times. */
+/*#ifndef __XEN_PERFC_DEFN_H__*/
+/*#define __XEN_PERFC_DEFN_H__*/
#define PERFC_MAX_PT_UPDATES 64
#define PERFC_PT_UPDATES_BUCKET_SIZE 3
@@ -124,4 +125,4 @@
PERFCOUNTER_CPU(remove_write_bad_prediction, "remove_write bad prediction")
PERFCOUNTER_CPU(update_hl2e_invlpg, "update_hl2e calls invlpg")
-#endif /* __XEN_PERFC_DEFN_H__ */
+/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/sched.h Thu Aug 25 22:53:20 2005
@@ -258,12 +258,32 @@
extern void sync_lazy_execstate_all(void);
extern int __sync_lazy_execstate(void);
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ *
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
extern void context_switch(
struct vcpu *prev,
struct vcpu *next);
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+ struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
struct vcpu *same);
@@ -297,10 +317,9 @@
(unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \
(unsigned long)(_a4), (unsigned long)(_a5), (unsigned long)(_a6))
-#define hypercall_preempt_check() (unlikely( \
- softirq_pending(smp_processor_id()) | \
- (!!current->vcpu_info->evtchn_upcall_pending & \
- !current->vcpu_info->evtchn_upcall_mask) \
+#define hypercall_preempt_check() (unlikely( \
+ softirq_pending(smp_processor_id()) | \
+ event_pending(current) \
))
/* This domain_hash and domain_list are protected by the domlist_lock. */
@@ -386,6 +405,7 @@
void domain_unpause(struct domain *d);
void domain_pause_by_systemcontroller(struct domain *d);
void domain_unpause_by_systemcontroller(struct domain *d);
+void cpu_init(void);
static inline void vcpu_unblock(struct vcpu *v)
{
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/serial.h
--- a/xen/include/xen/serial.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/serial.h Thu Aug 25 22:53:20 2005
@@ -113,8 +113,9 @@
/*
* Initialisers for individual uart drivers.
*/
+/* NB. Any default value can be 0 if it is unknown and must be specified. */
struct ns16550_defaults {
- int baud; /* default baud rate; 0 == pre-configured */
+ int baud; /* default baud rate; BAUD_AUTO == pre-configured */
int data_bits; /* default data bits (5, 6, 7 or 8) */
int parity; /* default parity (n, o, e, m or s) */
int stop_bits; /* default stop bits (1 or 2) */
@@ -122,6 +123,9 @@
unsigned long io_base; /* default io_base address */
};
void ns16550_init(int index, struct ns16550_defaults *defaults);
+
+/* Baud rate was pre-configured before invoking the UART driver. */
+#define BAUD_AUTO (-1)
#endif /* __XEN_SERIAL_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/time.h
--- a/xen/include/xen/time.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/time.h Thu Aug 25 22:53:20 2005
@@ -57,7 +57,7 @@
extern void update_dom_time(struct vcpu *v);
extern void do_settime(
- unsigned long secs, unsigned long usecs, u64 system_time_base);
+ unsigned long secs, unsigned long nsecs, u64 system_time_base);
#endif /* __XEN_TIME_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/trace.h
--- a/xen/include/xen/trace.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/trace.h Thu Aug 25 22:53:20 2005
@@ -67,6 +67,15 @@
if ( (tb_event_mask & event) == 0 )
return 0;
+ /* match class */
+ if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
+ return 0;
+
+ /* then match subclass */
+ if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
+ & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
+ return 0;
+
if ( (tb_cpu_mask & (1UL << smp_processor_id())) == 0 )
return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/tools/Makefile
--- a/xen/tools/Makefile Wed Aug 24 02:43:18 2005
+++ b/xen/tools/Makefile Thu Aug 25 22:53:20 2005
@@ -10,4 +10,4 @@
rm -f *.o symbols
symbols: symbols.c
- $(HOSTCC) -o $@ $<
+ $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
diff -r 5f1ed597f107 -r 8799d14bef77 xen/tools/symbols.c
--- a/xen/tools/symbols.c Wed Aug 24 02:43:18 2005
+++ b/xen/tools/symbols.c Thu Aug 25 22:53:20 2005
@@ -152,8 +152,8 @@
/* include the type field in the symbol name, so that it gets
* compressed together */
s->len = strlen(str) + 1;
- s->sym = (char *) malloc(s->len + 1);
- strcpy(s->sym + 1, str);
+ s->sym = (unsigned char *) malloc(s->len + 1);
+ strcpy((char *)s->sym + 1, str);
s->sym[0] = s->type;
return 0;
@@ -197,16 +197,16 @@
* move then they may get dropped in pass 2, which breaks the
* symbols rules.
*/
- if (s->addr == _etext && strcmp(s->sym + offset, "_etext"))
+ if (s->addr == _etext && strcmp((char *)s->sym + offset,
"_etext"))
return 0;
}
/* Exclude symbols which vary between passes. */
- if (strstr(s->sym + offset, "_compiled."))
+ if (strstr((char *)s->sym + offset, "_compiled."))
return 0;
for (i = 0; special_symbols[i]; i++)
- if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
+ if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
return 0;
return 1;
@@ -311,7 +311,7 @@
off = 0;
for (i = 0; i < cnt; i++) {
- if (!table[i].flags & SYM_FLAG_VALID)
+ if (!(table[i].flags & SYM_FLAG_VALID))
continue;
if ((valid & 0xFF) == 0)
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/init_task.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/init_task.c Thu Aug 25
22:53:20 2005
@@ -0,0 +1,49 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+#define swapper_pg_dir ((pgd_t *)NULL)
+struct mm_struct init_mm = INIT_MM(init_mm);
+#undef swapper_pg_dir
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp =
INIT_TSS;
+
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Thu Aug 25
22:53:20 2005
@@ -0,0 +1,660 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is a fallback for platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
+
+#define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) +
(sg)->offset)
+
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2. What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE 128
+
+/*
+ * log of the size of each IO TLB slab. The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+int swiotlb_force;
+static char *iotlb_virt_start;
+static unsigned long iotlb_nslabs;
+
+/*
+ * Used to do a quick range check in swiotlb_unmap_single and
+ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static dma_addr_t iotlb_bus_start, iotlb_bus_mask;
+
+/* Does the given dma address reside within the swiotlb aperture? */
+#define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask))
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static struct phys_addr {
+ struct page *page;
+ unsigned int offset;
+} *io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+ /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */
+ if (isdigit(*str)) {
+ iotlb_nslabs = simple_strtoul(str, &str, 0) <<
+ (20 - IO_TLB_SHIFT);
+ iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
+ /* Round up to power of two (xen_create_contiguous_region). */
+ while (iotlb_nslabs & (iotlb_nslabs-1))
+ iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
+ }
+ if (*str == ',')
+ ++str;
+ /*
+ * NB. 'force' enables the swiotlb, but doesn't force its use for
+ * every DMA like it does on native Linux.
+ */
+ if (!strcmp(str, "force"))
+ swiotlb_force = 1;
+ return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the PCI DMA API.
+ */
+void
+swiotlb_init_with_default_size (size_t default_size)
+{
+ unsigned long i, bytes;
+
+ if (!iotlb_nslabs) {
+ iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
+ /* Round up to power of two (xen_create_contiguous_region). */
+ while (iotlb_nslabs & (iotlb_nslabs-1))
+ iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
+ }
+
+ bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ iotlb_virt_start = alloc_bootmem_low_pages(bytes);
+ if (!iotlb_virt_start)
+ panic("Cannot allocate SWIOTLB buffer!\n"
+ "Use dom0_mem Xen boot parameter to reserve\n"
+ "some DMA memory (e.g., dom0_mem=-128M).\n");
+
+ xen_create_contiguous_region(
+ (unsigned long)iotlb_virt_start, get_order(bytes));
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
+ */
+ io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int));
+ for (i = 0; i < iotlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+ io_tlb_orig_addr = alloc_bootmem(
+ iotlb_nslabs * sizeof(*io_tlb_orig_addr));
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+
+ iotlb_bus_start = virt_to_bus(iotlb_virt_start);
+ iotlb_bus_mask = ~(dma_addr_t)(bytes - 1);
+
+ printk(KERN_INFO "Software IO TLB enabled: \n"
+ " Aperture: %lu megabytes\n"
+ " Bus range: 0x%016lx - 0x%016lx\n"
+ " Kernel range: 0x%016lx - 0x%016lx\n",
+ bytes >> 20,
+ (unsigned long)iotlb_bus_start,
+ (unsigned long)iotlb_bus_start + bytes,
+ (unsigned long)iotlb_virt_start,
+ (unsigned long)iotlb_virt_start + bytes);
+}
+
+void
+swiotlb_init(void)
+{
+ /* The user can forcibly enable swiotlb. */
+ if (swiotlb_force)
+ swiotlb = 1;
+
+ /*
+ * Otherwise, enable for domain 0 if the machine has 'lots of memory',
+ * which we take to mean more than 2GB.
+ */
+ if (xen_start_info.flags & SIF_INITDOMAIN) {
+ dom0_op_t op;
+ op.cmd = DOM0_PHYSINFO;
+ if ((HYPERVISOR_dom0_op(&op) == 0) &&
+ (op.u.physinfo.total_pages > 0x7ffff))
+ swiotlb = 1;
+ }
+
+ if (swiotlb)
+ swiotlb_init_with_default_size(64 * (1<<20));
+}
+
+static void
+__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
+{
+ if (PageHighMem(buffer.page)) {
+ size_t len, bytes;
+ char *dev, *host, *kmp;
+ len = size;
+ while (len != 0) {
+ if (((bytes = len) + buffer.offset) > PAGE_SIZE)
+ bytes = PAGE_SIZE - buffer.offset;
+ kmp = kmap_atomic(buffer.page, KM_SWIOTLB);
+ dev = dma_addr + size - len;
+ host = kmp + buffer.offset;
+ memcpy((dir == DMA_FROM_DEVICE) ? host : dev,
+ (dir == DMA_FROM_DEVICE) ? dev : host,
+ bytes);
+ kunmap_atomic(kmp, KM_SWIOTLB);
+ len -= bytes;
+ buffer.page++;
+ buffer.offset = 0;
+ }
+ } else {
+ char *host = (char *)phys_to_virt(
+ page_to_pseudophys(buffer.page)) + buffer.offset;
+ if (dir == DMA_FROM_DEVICE)
+ memcpy(host, dma_addr, size);
+ else if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, host, size);
+ }
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
+{
+ unsigned long flags;
+ char *dma_addr;
+ unsigned int nslots, stride, index, wrap;
+ int i;
+
+ /*
+ * For mappings greater than a page, we limit the stride (and
+ * hence alignment) to a page size.
+ */
+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ if (size > PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+ stride = 1;
+
+ BUG_ON(!nslots);
+
+ /*
+ * Find suitable number of IO TLB entries size that will fit this
+ * request and allocate a buffer from that IO TLB pool.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ wrap = index = ALIGN(io_tlb_index, stride);
+
+ if (index >= iotlb_nslabs)
+ wrap = index = 0;
+
+ do {
+ /*
+ * If we find a slot that indicates we have 'nslots'
+ * number of contiguous buffers, we allocate the
+ * buffers from that slot and mark the entries as '0'
+ * indicating unavailable.
+ */
+ if (io_tlb_list[index] >= nslots) {
+ int count = 0;
+
+ for (i = index; i < (int)(index + nslots); i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1;
+ (OFFSET(i, IO_TLB_SEGSIZE) !=
+ IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ dma_addr = iotlb_virt_start +
+ (index << IO_TLB_SHIFT);
+
+ /*
+ * Update the indices to avoid searching in
+ * the next round.
+ */
+ io_tlb_index =
+ ((index + nslots) < iotlb_nslabs
+ ? (index + nslots) : 0);
+
+ goto found;
+ }
+ index += stride;
+ if (index >= iotlb_nslabs)
+ index = 0;
+ } while (index != wrap);
+
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return NULL;
+ }
+ found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ io_tlb_orig_addr[index] = buffer;
+ if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+ __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
+
+ return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ unsigned long flags;
+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
+ struct phys_addr buffer = io_tlb_orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+ __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
+
+ /*
+ * Return the buffer to the free list by setting the corresponding
+ * entries to indicate the number of contigous entries available.
+ * While returning the entries to the free list, we merge the entries
+ * with slots below and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+ io_tlb_list[index + nslots] : 0);
+ /*
+ * Step 1: return the slots to the free list, merging the
+ * slots with superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--)
+ io_tlb_list[i] = ++count;
+ /*
+ * Step 2: merge the returned slots with the preceding slots,
+ * if available (non zero)
+ */
+ for (i = index - 1;
+ (OFFSET(i, IO_TLB_SEGSIZE) !=
+ IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
+ struct phys_addr buffer = io_tlb_orig_addr[index];
+ BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
+ __sync_single(buffer, dma_addr, size, dir);
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+{
+ /*
+ * Ran out of IOMMU space for this operation. This is very bad.
+ * Unfortunately the drivers cannot handle this operation properly.
+ * unless they check for pci_dma_mapping_error (most don't)
+ * When the mapping is small enough return a static buffer to limit
+ * the damage, or panic when the transfer is too big.
+ */
+ printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
+ "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
+
+ if (size > io_tlb_overflow && do_panic) {
+ if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Memory would be corrupted\n");
+ if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Random memory would be DMAed\n");
+ }
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode. The
+ * PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+{
+ dma_addr_t dev_addr = virt_to_bus(ptr);
+ void *map;
+ struct phys_addr buffer;
+
+ BUG_ON(dir == DMA_NONE);
+
+ /*
+ * If the pointer passed in happens to be in the device's DMA window,
+ * we can safely return the device addr and not worry about bounce
+ * buffering it.
+ */
+ if (!range_straddles_page_boundary(ptr, size) &&
+ !address_needs_mapping(hwdev, dev_addr))
+ return dev_addr;
+
+ /*
+ * Oh well, have to allocate and map a bounce buffer.
+ */
+ buffer.page = virt_to_page(ptr);
+ buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
+ map = map_single(hwdev, buffer, size, dir);
+ if (!map) {
+ swiotlb_full(hwdev, size, dir, 1);
+ map = io_tlb_overflow_buffer;
+ }
+
+ dev_addr = virt_to_bus(map);
+
+ /*
+ * Ensure that the address returned is DMA'ble
+ */
+ if (address_needs_mapping(hwdev, dev_addr))
+ panic("map_single: bounce buffer is not DMA'ble");
+
+ return dev_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+ int dir)
+{
+ BUG_ON(dir == DMA_NONE);
+ if (in_swiotlb_aperture(dev_addr))
+ unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
+ * call this function before doing so. At the next point you give the PCI dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ BUG_ON(dir == DMA_NONE);
+ if (in_swiotlb_aperture(dev_addr))
+ sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ BUG_ON(dir == DMA_NONE);
+ if (in_swiotlb_aperture(dev_addr))
+ sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_single
+ * interface. Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * same here.
+ */
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ struct phys_addr buffer;
+ dma_addr_t dev_addr;
+ char *map;
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++) {
+ dev_addr = SG_ENT_PHYS_ADDRESS(sg);
+ if (address_needs_mapping(hwdev, dev_addr)) {
+ buffer.page = sg->page;
+ buffer.offset = sg->offset;
+ map = map_single(hwdev, buffer, sg->length, dir);
+ if (!map) {
+ /* Don't panic here, we expect map_sg users
+ to do proper error handling. */
+ swiotlb_full(hwdev, sg->length, dir, 0);
+ swiotlb_unmap_sg(hwdev, sg - i, i, dir);
+ sg[0].dma_length = 0;
+ return 0;
+ }
+ sg->dma_address = (dma_addr_t)virt_to_bus(map);
+ } else
+ sg->dma_address = dev_addr;
+ sg->dma_length = sg->length;
+ }
+ return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_single() above.
+ */
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ unmap_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+dma_addr_t
+swiotlb_map_page(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ struct phys_addr buffer;
+ dma_addr_t dev_addr;
+ char *map;
+
+ dev_addr = page_to_phys(page) + offset;
+ if (address_needs_mapping(hwdev, dev_addr)) {
+ buffer.page = page;
+ buffer.offset = offset;
+ map = map_single(hwdev, buffer, size, direction);
+ if (!map) {
+ swiotlb_full(hwdev, size, direction, 1);
+ map = io_tlb_overflow_buffer;
+ }
+ dev_addr = (dma_addr_t)virt_to_bus(map);
+ }
+
+ return dev_addr;
+}
+
+void
+swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (in_swiotlb_aperture(dma_address))
+ unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
+}
+
+int
+swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported (struct device *hwdev, u64 mask)
+{
+ return (mask >= 0xffffffffUL);
+}
+
+EXPORT_SYMBOL(swiotlb_init);
+EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL(swiotlb_map_sg);
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+EXPORT_SYMBOL(swiotlb_map_page);
+EXPORT_SYMBOL(swiotlb_unmap_page);
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+EXPORT_SYMBOL(swiotlb_dma_supported);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c Thu Aug 25
22:53:20 2005
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+
+#ifndef CONFIG_XEN
+struct genapic *genapic = &apic_flat;
+#else
+extern struct genapic apic_xen;
+struct genapic *genapic = &apic_xen;
+#endif
+
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+#ifndef CONFIG_XEN
+ long i;
+ u8 clusters, max_cluster;
+ u8 id;
+ u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ /* AMD always uses flat mode right now */
+ genapic = &apic_flat;
+ goto print;
+ }
+
+#if defined(CONFIG_ACPI_BUS)
+ /*
+ * Some x86_64 machines use physical APIC mode regardless of how many
+ * procs/clusters are present (x86_64 ES7000 is an example).
+ */
+ if (acpi_fadt.revision > FADT2_REVISION_ID)
+ if (acpi_fadt.force_apic_physical_destination_mode) {
+ genapic = &apic_cluster;
+ goto print;
+ }
+#endif
+
+ memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+ for (i = 0; i < NR_CPUS; i++) {
+ id = bios_cpu_apicid[i];
+ if (id != BAD_APICID)
+ cluster_cnt[APIC_CLUSTERID(id)]++;
+ }
+
+ clusters = 0;
+ max_cluster = 0;
+ for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+ if (cluster_cnt[i] > 0) {
+ ++clusters;
+ if (cluster_cnt[i] > max_cluster)
+ max_cluster = cluster_cnt[i];
+ }
+ }
+
+ /*
+ * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+ * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+ * else physical mode.
+ * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+ * can ignore the clustered logical case and go straight to physical.)
+ */
+ if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+ genapic = &apic_flat;
+ else
+ genapic = &apic_cluster;
+
+print:
+#else
+ /* hardcode to xen apic functions */
+ genapic = &apic_xen;
+#endif
+ printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+#ifdef CONFIG_XEN
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned
int dest);
+#endif
+
+void send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+ __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#else
+ xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#endif
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c Thu Aug 25
22:53:20 2005
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code. Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#else
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/genapic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+ unsigned int evtchn;
+ Dprintk("%s\n", __FUNCTION__);
+
+ evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+ if (evtchn)
+ notify_via_evtchn(evtchn);
+ else
+ printk("send_IPI to unbound port %d/%d", cpu, vector);
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int
dest)
+{
+ int cpu;
+
+ switch (shortcut) {
+ case APIC_DEST_SELF:
+ __send_IPI_one(smp_processor_id(), vector);
+ break;
+ case APIC_DEST_ALLBUT:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ case APIC_DEST_ALLINC:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ default:
+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+ vector);
+ break;
+ }
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+ return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then
+ * we get an APIC send error if we try to broadcast.
+ * thus we have to avoid sending IPIs in this case.
+ */
+ Dprintk("%s\n", __FUNCTION__);
+ if (num_online_cpus() > 1)
+ xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+ unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned int cpu;
+ unsigned long flags;
+
+ Dprintk("%s\n", __FUNCTION__);
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, cpumask)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static int xen_apic_id_registered(void)
+{
+ /* better be set */
+ Dprintk("%s\n", __FUNCTION__);
+ return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+#endif
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ u32 ebx;
+
+ Dprintk("%s\n", __FUNCTION__);
+ ebx = cpuid_ebx(1);
+ return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen = {
+ .name = "xen",
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ .int_delivery_mode = dest_LowestPrio,
+#endif
+ .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+ .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+ .target_cpus = xen_target_cpus,
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ .apic_id_registered = xen_apic_id_registered,
+#endif
+ .init_apic_ldr = xen_init_apic_ldr,
+ .send_IPI_all = xen_send_IPI_all,
+ .send_IPI_allbutself = xen_send_IPI_allbutself,
+ .send_IPI_mask = xen_send_IPI_mask,
+ .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+};
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,280 @@
+/* Xenbus code for blkif backend
+ Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdarg.h>
+#include <linux/module.h>
+#include <asm-xen/xenbus.h>
+#include "common.h"
+
+struct backend_info
+{
+ struct xenbus_device *dev;
+
+ /* our communications channel */
+ blkif_t *blkif;
+
+ long int frontend_id;
+ long int pdev;
+ long int readonly;
+
+ /* watch back end for changes */
+ struct xenbus_watch backend_watch;
+
+ /* watch front end for changes */
+ struct xenbus_watch watch;
+ char *frontpath;
+};
+
+static int blkback_remove(struct xenbus_device *dev)
+{
+ struct backend_info *be = dev->data;
+
+ if (be->watch.node)
+ unregister_xenbus_watch(&be->watch);
+ unregister_xenbus_watch(&be->backend_watch);
+ if (be->blkif)
+ blkif_put(be->blkif);
+ if (be->frontpath)
+ kfree(be->frontpath);
+ kfree(be);
+ return 0;
+}
+
+/* Front end tells us frame. */
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+ unsigned long ring_ref;
+ unsigned int evtchn;
+ int err;
+ struct backend_info *be
+ = container_of(watch, struct backend_info, watch);
+
+ /* If other end is gone, delete ourself. */
+ if (node && !xenbus_exists(be->frontpath, "")) {
+ xenbus_rm(be->dev->nodename, "");
+ device_unregister(&be->dev->dev);
+ return;
+ }
+ if (be->blkif == NULL || be->blkif->status == CONNECTED)
+ return;
+
+ err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_error(be->dev, err,
+ "reading %s/ring-ref and event-channel",
+ be->frontpath);
+ return;
+ }
+
+ /* Supply the information about the device the frontend needs */
+ err = xenbus_transaction_start(be->dev->nodename);
+ if (err) {
+ xenbus_dev_error(be->dev, err, "starting transaction");
+ return;
+ }
+
+ err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
+ vbd_size(&be->blkif->vbd));
+ if (err) {
+ xenbus_dev_error(be->dev, err, "writing %s/sectors",
+ be->dev->nodename);
+ goto abort;
+ }
+
+ /* FIXME: use a typename instead */
+ err = xenbus_printf(be->dev->nodename, "info", "%u",
+ vbd_info(&be->blkif->vbd));
+ if (err) {
+ xenbus_dev_error(be->dev, err, "writing %s/info",
+ be->dev->nodename);
+ goto abort;
+ }
+ err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
+ vbd_secsize(&be->blkif->vbd));
+ if (err) {
+ xenbus_dev_error(be->dev, err, "writing %s/sector-size",
+ be->dev->nodename);
+ goto abort;
+ }
+
+ /* Map the shared frame, irq etc. */
+ err = blkif_map(be->blkif, ring_ref, evtchn);
+ if (err) {
+ xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u",
+ ring_ref, evtchn);
+ goto abort;
+ }
+
+ xenbus_transaction_end(0);
+ xenbus_dev_ok(be->dev);
+
+ return;
+
+abort:
+ xenbus_transaction_end(1);
+}
+
+/*
+ Setup supplies physical device.
+ We provide event channel and device details to front end.
+ Frontend supplies shared frame and event channel.
+ */
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+ int err;
+ char *p;
+ long int handle, pdev;
+ struct backend_info *be
+ = container_of(watch, struct backend_info, backend_watch);
+ struct xenbus_device *dev = be->dev;
+
+ err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev);
+ if (XENBUS_EXIST_ERR(err))
+ return;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading physical-device");
+ return;
+ }
+ if (be->pdev && be->pdev != pdev) {
+ printk(KERN_WARNING
+ "changing physical-device not supported\n");
+ return;
+ }
+ be->pdev = pdev;
+
+ /* If there's a read-only node, we're read only. */
+ p = xenbus_read(dev->nodename, "read-only", NULL);
+ if (!IS_ERR(p)) {
+ be->readonly = 1;
+ kfree(p);
+ }
+
+ if (be->blkif == NULL) {
+ /* Front end dir is a number, which is used as the handle. */
+ p = strrchr(be->frontpath, '/') + 1;
+ handle = simple_strtoul(p, NULL, 0);
+
+ be->blkif = alloc_blkif(be->frontend_id);
+ if (IS_ERR(be->blkif)) {
+ err = PTR_ERR(be->blkif);
+ be->blkif = NULL;
+ xenbus_dev_error(dev, err, "creating block interface");
+ return;
+ }
+
+ err = vbd_create(be->blkif, handle, be->pdev, be->readonly);
+ if (err) {
+ xenbus_dev_error(dev, err, "creating vbd structure");
+ return;
+ }
+
+ /* Pass in NULL node to skip exist test. */
+ frontend_changed(&be->watch, NULL);
+ }
+}
+
+static int blkback_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct backend_info *be;
+ char *frontend;
+ int err;
+
+ be = kmalloc(sizeof(*be), GFP_KERNEL);
+ if (!be) {
+ xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+ return -ENOMEM;
+ }
+ memset(be, 0, sizeof(*be));
+
+ frontend = NULL;
+ err = xenbus_gather(dev->nodename,
+ "frontend-id", "%li", &be->frontend_id,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (XENBUS_EXIST_ERR(err))
+ goto free_be;
+ if (err < 0) {
+ xenbus_dev_error(dev, err,
+ "reading %s/frontend or frontend-id",
+ dev->nodename);
+ goto free_be;
+ }
+ if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+ /* If we can't get a frontend path and a frontend-id,
+ * then our bus-id is no longer valid and we need to
+ * destroy the backend device.
+ */
+ err = -ENOENT;
+ goto free_be;
+ }
+
+ be->dev = dev;
+ be->backend_watch.node = dev->nodename;
+ be->backend_watch.callback = backend_changed;
+ err = register_xenbus_watch(&be->backend_watch);
+ if (err) {
+ be->backend_watch.node = NULL;
+ xenbus_dev_error(dev, err, "adding backend watch on %s",
+ dev->nodename);
+ goto free_be;
+ }
+
+ be->frontpath = frontend;
+ be->watch.node = be->frontpath;
+ be->watch.callback = frontend_changed;
+ err = register_xenbus_watch(&be->watch);
+ if (err) {
+ be->watch.node = NULL;
+ xenbus_dev_error(dev, err,
+ "adding frontend watch on %s",
+ be->frontpath);
+ goto free_be;
+ }
+
+ dev->data = be;
+
+ backend_changed(&be->backend_watch, dev->nodename);
+ return 0;
+
+ free_be:
+ if (be->backend_watch.node)
+ unregister_xenbus_watch(&be->backend_watch);
+ if (frontend)
+ kfree(frontend);
+ kfree(be);
+ return err;
+}
+
+static struct xenbus_device_id blkback_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+static struct xenbus_driver blkback = {
+ .name = "vbd",
+ .owner = THIS_MODULE,
+ .ids = blkback_ids,
+ .probe = blkback_probe,
+ .remove = blkback_remove,
+};
+
+void blkif_xenbus_init(void)
+{
+ xenbus_register_backend(&blkback);
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,257 @@
+/* Xenbus code for netif backend
+ Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdarg.h>
+#include <linux/module.h>
+#include <asm-xen/xenbus.h>
+#include "common.h"
+
+struct backend_info
+{
+ struct xenbus_device *dev;
+
+ /* our communications channel */
+ netif_t *netif;
+
+ long int frontend_id;
+#if 0
+ long int pdev;
+ long int readonly;
+#endif
+
+ /* watch back end for changes */
+ struct xenbus_watch backend_watch;
+
+ /* watch front end for changes */
+ struct xenbus_watch watch;
+ char *frontpath;
+};
+
+static int netback_remove(struct xenbus_device *dev)
+{
+ struct backend_info *be = dev->data;
+
+ if (be->watch.node)
+ unregister_xenbus_watch(&be->watch);
+ unregister_xenbus_watch(&be->backend_watch);
+ if (be->netif)
+ netif_disconnect(be->netif);
+ if (be->frontpath)
+ kfree(be->frontpath);
+ kfree(be);
+ return 0;
+}
+
+/* Front end tells us frame. */
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+ unsigned long tx_ring_ref, rx_ring_ref;
+ unsigned int evtchn;
+ int err;
+ struct backend_info *be
+ = container_of(watch, struct backend_info, watch);
+ char *mac, *e, *s;
+ int i;
+
+ /* If other end is gone, delete ourself. */
+ if (node && !xenbus_exists(be->frontpath, "")) {
+ xenbus_rm(be->dev->nodename, "");
+ device_unregister(&be->dev->dev);
+ return;
+ }
+ if (be->netif == NULL || be->netif->status == CONNECTED)
+ return;
+
+ mac = xenbus_read(be->frontpath, "mac", NULL);
+ if (IS_ERR(mac)) {
+ err = PTR_ERR(mac);
+ xenbus_dev_error(be->dev, err, "reading %s/mac",
+ be->dev->nodename);
+ return;
+ }
+ s = mac;
+ for (i = 0; i < ETH_ALEN; i++) {
+ be->netif->fe_dev_addr[i] = simple_strtoul(s, &e, 16);
+ if (s == e || (e[0] != ':' && e[0] != 0)) {
+ kfree(mac);
+ err = -ENOENT;
+ xenbus_dev_error(be->dev, err, "parsing %s/mac",
+ be->dev->nodename);
+ return;
+ }
+ s = &e[1];
+ }
+ kfree(mac);
+
+ err = xenbus_gather(be->frontpath, "tx-ring-ref", "%lu", &tx_ring_ref,
+ "rx-ring-ref", "%lu", &rx_ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_error(be->dev, err,
+ "reading %s/ring-ref and event-channel",
+ be->frontpath);
+ return;
+ }
+
+ /* Map the shared frame, irq etc. */
+ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
+ if (err) {
+ xenbus_dev_error(be->dev, err,
+ "mapping shared-frames %lu/%lu port %u",
+ tx_ring_ref, rx_ring_ref, evtchn);
+ return;
+ }
+
+ xenbus_dev_ok(be->dev);
+
+ return;
+}
+
+/*
+ Setup supplies physical device.
+ We provide event channel and device details to front end.
+ Frontend supplies shared frame and event channel.
+ */
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+ int err;
+ long int handle;
+ struct backend_info *be
+ = container_of(watch, struct backend_info, backend_watch);
+ struct xenbus_device *dev = be->dev;
+ u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
+
+ err = xenbus_scanf(dev->nodename, "handle", "%li", &handle);
+ if (XENBUS_EXIST_ERR(err))
+ return;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading handle");
+ return;
+ }
+
+ if (be->netif == NULL) {
+ be->netif = alloc_netif(be->frontend_id, handle, be_mac);
+ if (IS_ERR(be->netif)) {
+ err = PTR_ERR(be->netif);
+ be->netif = NULL;
+ xenbus_dev_error(dev, err, "creating interface");
+ return;
+ }
+
+#if 0
+ err = vbd_create(be->netif, handle, be->pdev, be->readonly);
+ if (err) {
+ xenbus_dev_error(dev, err, "creating vbd structure");
+ return;
+ }
+#endif
+
+ /* Pass in NULL node to skip exist test. */
+ frontend_changed(&be->watch, NULL);
+ }
+}
+
+static int netback_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct backend_info *be;
+ char *frontend;
+ int err;
+
+ be = kmalloc(sizeof(*be), GFP_KERNEL);
+ if (!be) {
+ xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+ return -ENOMEM;
+ }
+ memset(be, 0, sizeof(*be));
+
+ frontend = NULL;
+ err = xenbus_gather(dev->nodename,
+ "frontend-id", "%li", &be->frontend_id,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (XENBUS_EXIST_ERR(err))
+ goto free_be;
+ if (err < 0) {
+ xenbus_dev_error(dev, err,
+ "reading %s/frontend or frontend-id",
+ dev->nodename);
+ goto free_be;
+ }
+ if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+ /* If we can't get a frontend path and a frontend-id,
+ * then our bus-id is no longer valid and we need to
+ * destroy the backend device.
+ */
+ err = -ENOENT;
+ goto free_be;
+ }
+
+ be->dev = dev;
+ be->backend_watch.node = dev->nodename;
+ be->backend_watch.callback = backend_changed;
+ err = register_xenbus_watch(&be->backend_watch);
+ if (err) {
+ be->backend_watch.node = NULL;
+ xenbus_dev_error(dev, err, "adding backend watch on %s",
+ dev->nodename);
+ goto free_be;
+ }
+
+ be->frontpath = frontend;
+ be->watch.node = be->frontpath;
+ be->watch.callback = frontend_changed;
+ err = register_xenbus_watch(&be->watch);
+ if (err) {
+ be->watch.node = NULL;
+ xenbus_dev_error(dev, err,
+ "adding frontend watch on %s",
+ be->frontpath);
+ goto free_be;
+ }
+
+ dev->data = be;
+
+ backend_changed(&be->backend_watch, dev->nodename);
+ return 0;
+
+ free_be:
+ if (be->backend_watch.node)
+ unregister_xenbus_watch(&be->backend_watch);
+ if (frontend)
+ kfree(frontend);
+ kfree(be);
+ return err;
+}
+
+static struct xenbus_device_id netback_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static struct xenbus_driver netback = {
+ .name = "vif",
+ .owner = THIS_MODULE,
+ .ids = netback_ids,
+ .probe = netback_probe,
+ .remove = netback_remove,
+};
+
+void netif_xenbus_init(void)
+{
+ xenbus_register_backend(&netback);
+}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hw_irq.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hw_irq.h Thu Aug 25
22:53:20 2005
@@ -0,0 +1,71 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ * linux/include/asm/hw_irq.h
+ *
+ * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ * moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ * IRQ/IPI changes taken from work by Thomas Radke
+ * <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <linux/profile.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern u8 irq_vector[NR_IRQ_VECTORS];
+#define IO_APIC_VECTOR(irq) (irq_vector[irq])
+#define AUTO_ASSIGN -1
+
+extern void (*interrupt[NR_IRQS])(void);
+
+#ifdef CONFIG_SMP
+fastcall void reschedule_interrupt(void);
+fastcall void invalidate_interrupt(void);
+fastcall void call_function_interrupt(void);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+fastcall void apic_timer_interrupt(void);
+fastcall void error_interrupt(void);
+fastcall void spurious_interrupt(void);
+fastcall void thermal_interrupt(struct pt_regs *);
+#define platform_legacy_irq(irq) ((irq) < 16)
+#endif
+
+void disable_8259A_irq(unsigned int irq);
+void enable_8259A_irq(unsigned int irq);
+int i8259A_irq_pending(unsigned int irq);
+void make_8259A_irq(unsigned int irq);
+void init_8259A(int aeoi);
+void FASTCALL(send_IPI_self(int vector));
+void init_VISWS_APIC_irqs(void);
+void setup_IO_APIC(void);
+void disable_IO_APIC(void);
+void print_IO_APIC(void);
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+void send_IPI(int dest, int vector);
+void setup_ioapic_dest(void);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+extern void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i);
+
+#endif /* _ASM_HW_IRQ_H */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h Thu Aug
25 22:53:20 2005
@@ -0,0 +1,32 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0) KM_BOUNCE_READ,
+D(1) KM_SKB_SUNRPC_DATA,
+D(2) KM_SKB_DATA_SOFTIRQ,
+D(3) KM_USER0,
+D(4) KM_USER1,
+D(5) KM_BIO_SRC_IRQ,
+D(6) KM_BIO_DST_IRQ,
+D(7) KM_PTE0,
+D(8) KM_PTE1,
+D(9) KM_IRQ0,
+D(10) KM_IRQ1,
+D(11) KM_SOFTIRQ0,
+D(12) KM_SOFTIRQ1,
+D(13) KM_SWIOTLB,
+D(14) KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h Thu Aug
25 22:53:20 2005
@@ -0,0 +1,22 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+ struct page *page;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+ unsigned int dma_length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg) ((sg)->dma_address)
+#define sg_dma_len(sg) ((sg)->dma_length)
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h Thu Aug 25
22:53:20 2005
@@ -0,0 +1,42 @@
+#ifndef _ASM_SWIOTLB_H
+#define _ASM_SWIOTLB_H 1
+
+#include <linux/config.h>
+
+/* SWIOTLB interface */
+
+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t
size,
+ int dir);
+extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_single_for_device(struct device *hwdev,
+ dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
+ struct scatterlist *sg, int nelems,
+ int dir);
+extern void swiotlb_sync_sg_for_device(struct device *hwdev,
+ struct scatterlist *sg, int nelems,
+ int dir);
+extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction);
+extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction);
+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+#else
+#define swiotlb 0
+#endif
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hw_irq.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hw_irq.h Thu Aug 25
22:53:20 2005
@@ -0,0 +1,138 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ * linux/include/asm/hw_irq.h
+ *
+ * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ * moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ * IRQ/IPI changes taken from work by Thomas Radke
+ * <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * hacked by Andi Kleen for x86-64.
+ *
+ * $Id: hw_irq.h,v 1.24 2001/09/14 20:55:03 vojtech Exp $
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <linux/profile.h>
+#include <linux/smp.h>
+
+struct hw_interrupt_type;
+#endif
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR 0x20
+
+#define IA32_SYSCALL_VECTOR 0x80
+
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ * some of the following vectors are 'rare', they are merged
+ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ * TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ * Vectors 0xf0-0xf9 are free (reserved for future Linux use).
+ */
+#ifndef CONFIG_XEN
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define RESCHEDULE_VECTOR 0xfc
+#define TASK_MIGRATION_VECTOR 0xfb
+#define CALL_FUNCTION_VECTOR 0xfa
+#define KDB_VECTOR 0xf9
+
+#define THERMAL_APIC_VECTOR 0xf0
+#endif
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR 0x31
+#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */
+
+
+#ifndef __ASSEMBLY__
+extern u8 irq_vector[NR_IRQ_VECTORS];
+#define IO_APIC_VECTOR(irq) (irq_vector[irq])
+#define AUTO_ASSIGN -1
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+extern void setup_ioapic_dest(void);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#include <asm/ptrace.h>
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+/*
+ * SMP has a few special interrupts for IPI messages
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n.p2align\n" \
+"IRQ" #nr "_interrupt:\n\t" \
+ "push $" #nr "-256 ; " \
+ "jmp common_interrupt");
+
+extern void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i);
+
+#define platform_legacy_irq(irq) ((irq) < 16)
+
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff -r 5f1ed597f107 -r 8799d14bef77 patches/linux-2.6.12/patch-2.6.12.5
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/patches/linux-2.6.12/patch-2.6.12.5 Thu Aug 25 22:53:20 2005
@@ -0,0 +1,1614 @@
+diff --git a/Makefile b/Makefile
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 12
+-EXTRAVERSION =
++EXTRAVERSION = .5
+ NAME=Woozy Numbat
+
+ # *DOCUMENTATION*
+@@ -1149,7 +1149,7 @@ endif # KBUILD_EXTMOD
+ #(which is the most common case IMHO) to avoid unneeded clutter in the big
tags file.
+ #Adding $(srctree) adds about 20M on i386 to the size of the output file!
+
+-ifeq ($(KBUILD_OUTPUT),)
++ifeq ($(src),$(obj))
+ __srctree =
+ else
+ __srctree = $(srctree)/
+diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
++++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+@@ -44,7 +44,7 @@
+
+ #define PFX "powernow-k8: "
+ #define BFX PFX "BIOS error: "
+-#define VERSION "version 1.40.2"
++#define VERSION "version 1.40.4"
+ #include "powernow-k8.h"
+
+ /* serialize freq changes */
+@@ -978,7 +978,7 @@ static int __init powernowk8_cpu_init(st
+ {
+ struct powernow_k8_data *data;
+ cpumask_t oldmask = CPU_MASK_ALL;
+- int rc;
++ int rc, i;
+
+ if (!check_supported_cpu(pol->cpu))
+ return -ENODEV;
+@@ -1064,7 +1064,9 @@ static int __init powernowk8_cpu_init(st
+ printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);
+
+- powernow_data[pol->cpu] = data;
++ for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
++ powernow_data[i] = data;
++ }
+
+ return 0;
+
+diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
+--- a/arch/i386/kernel/process.c
++++ b/arch/i386/kernel/process.c
+@@ -827,6 +827,8 @@ asmlinkage int sys_get_thread_area(struc
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
++ memset(&info, 0, sizeof(info));
++
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
+--- a/arch/ia64/kernel/ptrace.c
++++ b/arch/ia64/kernel/ptrace.c
+@@ -945,6 +945,13 @@ access_uarea (struct task_struct *child,
+ *data = (pt->cr_ipsr & IPSR_MASK);
+ return 0;
+
++ case PT_AR_RSC:
++ if (write_access)
++ pt->ar_rsc = *data | (3 << 2); /* force PL3 */
++ else
++ *data = pt->ar_rsc;
++ return 0;
++
+ case PT_AR_RNAT:
+ urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ rnat_addr = (long) ia64_rse_rnat_addr((long *)
+@@ -996,9 +1003,6 @@ access_uarea (struct task_struct *child,
+ case PT_AR_BSPSTORE:
+ ptr = pt_reg_addr(pt, ar_bspstore);
+ break;
+- case PT_AR_RSC:
+- ptr = pt_reg_addr(pt, ar_rsc);
+- break;
+ case PT_AR_UNAT:
+ ptr = pt_reg_addr(pt, ar_unat);
+ break;
+@@ -1234,7 +1238,7 @@ ptrace_getregs (struct task_struct *chil
+ static long
+ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user
*ppr)
+ {
+- unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
++ unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
+ struct unw_frame_info info;
+ struct switch_stack *sw;
+ struct ia64_fpreg fpval;
+@@ -1267,7 +1271,7 @@ ptrace_setregs (struct task_struct *chil
+ /* app regs */
+
+ retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+- retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
++ retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
+ retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+ retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+ retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+@@ -1365,6 +1369,7 @@ ptrace_setregs (struct task_struct *chil
+ retval |= __get_user(nat_bits, &ppr->nat);
+
+ retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
++ retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
+ retval |= access_uarea(child, PT_AR_EC, &ec, 1);
+ retval |= access_uarea(child, PT_AR_LC, &lc, 1);
+ retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
+diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
+--- a/arch/ia64/kernel/signal.c
++++ b/arch/ia64/kernel/signal.c
+@@ -94,7 +94,7 @@ sys_sigaltstack (const stack_t __user *u
+ static long
+ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+ {
+- unsigned long ip, flags, nat, um, cfm;
++ unsigned long ip, flags, nat, um, cfm, rsc;
+ long err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+@@ -106,7 +106,7 @@ restore_sigcontext (struct sigcontext __
+ err |= __get_user(ip, &sc->sc_ip); /* instruction
pointer */
+ err |= __get_user(cfm, &sc->sc_cfm);
+ err |= __get_user(um, &sc->sc_um); /* user mask */
+- err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
++ err |= __get_user(rsc, &sc->sc_ar_rsc);
+ err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+ err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+ err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+@@ -119,6 +119,7 @@ restore_sigcontext (struct sigcontext __
+ err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15
*/
+
+ scr->pt.cr_ifs = cfm | (1UL << 63);
++ scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+
+ /* establish new instruction pointer: */
+ scr->pt.cr_iip = ip & ~0x3UL;
+diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c
+--- a/arch/ppc/kernel/time.c
++++ b/arch/ppc/kernel/time.c
+@@ -89,6 +89,9 @@ unsigned long tb_to_ns_scale;
+
+ extern unsigned long wall_jiffies;
+
++/* used for timezone offset */
++static long timezone_offset;
++
+ DEFINE_SPINLOCK(rtc_lock);
+
+ EXPORT_SYMBOL(rtc_lock);
+@@ -170,7 +173,7 @@ void timer_interrupt(struct pt_regs * re
+ xtime.tv_sec - last_rtc_update >= 659 &&
+ abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) <
500000/HZ &&
+ jiffies - wall_jiffies == 1) {
+- if (ppc_md.set_rtc_time(xtime.tv_sec+1 + time_offset)
== 0)
++ if (ppc_md.set_rtc_time(xtime.tv_sec+1 +
timezone_offset) == 0)
+ last_rtc_update = xtime.tv_sec+1;
+ else
+ /* Try again one minute later */
+@@ -286,7 +289,7 @@ void __init time_init(void)
+ unsigned old_stamp, stamp, elapsed;
+
+ if (ppc_md.time_init != NULL)
+- time_offset = ppc_md.time_init();
++ timezone_offset = ppc_md.time_init();
+
+ if (__USE_RTC()) {
+ /* 601 processor: dec counts down by 128 every 128ns */
+@@ -331,10 +334,10 @@ void __init time_init(void)
+ set_dec(tb_ticks_per_jiffy);
+
+ /* If platform provided a timezone (pmac), we correct the time */
+- if (time_offset) {
+- sys_tz.tz_minuteswest = -time_offset / 60;
++ if (timezone_offset) {
++ sys_tz.tz_minuteswest = -timezone_offset / 60;
+ sys_tz.tz_dsttime = 0;
+- xtime.tv_sec -= time_offset;
++ xtime.tv_sec -= timezone_offset;
+ }
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c
+--- a/arch/ppc64/boot/zlib.c
++++ b/arch/ppc64/boot/zlib.c
+@@ -1307,7 +1307,7 @@ local int huft_build(
+ {
+ *t = (inflate_huft *)Z_NULL;
+ *m = 0;
+- return Z_OK;
++ return Z_DATA_ERROR;
+ }
+
+
+@@ -1351,6 +1351,7 @@ local int huft_build(
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
++ n = x[g]; /* set n to length of v */
+
+
+ /* Generate the Huffman codes and for each, make the table entries */
+diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -130,7 +130,7 @@ int start_fork_tramp(void *thread_arg, u
+ return(arg.pid);
+ }
+
+-static int ptrace_child(void)
++static int ptrace_child(void *arg)
+ {
+ int ret;
+ int pid = os_getpid(), ppid = getppid();
+@@ -159,16 +159,20 @@ static int ptrace_child(void)
+ _exit(ret);
+ }
+
+-static int start_ptraced_child(void)
++static int start_ptraced_child(void **stack_out)
+ {
++ void *stack;
++ unsigned long sp;
+ int pid, n, status;
+
+- pid = fork();
+- if(pid == 0)
+- ptrace_child();
+-
++ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
++ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
++ if(stack == MAP_FAILED)
++ panic("check_ptrace : mmap failed, errno = %d", errno);
++ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
++ pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL);
+ if(pid < 0)
+- panic("check_ptrace : fork failed, errno = %d", errno);
++ panic("check_ptrace : clone failed, errno = %d", errno);
+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
+ if(n < 0)
+ panic("check_ptrace : wait failed, errno = %d", errno);
+@@ -176,6 +180,7 @@ static int start_ptraced_child(void)
+ panic("check_ptrace : expected SIGSTOP, got status = %d",
+ status);
+
++ *stack_out = stack;
+ return(pid);
+ }
+
+@@ -183,12 +188,12 @@ static int start_ptraced_child(void)
+ * just avoid using sysemu, not panic, but only if SYSEMU features are broken.
+ * So only for SYSEMU features we test mustpanic, while normal host features
+ * must work anyway!*/
+-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
++static int stop_ptraced_child(int pid, void *stack, int exitcode, int
mustpanic)
+ {
+ int status, n, ret = 0;
+
+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+- panic("stop_ptraced_child : ptrace failed, errno = %d", errno);
++ panic("check_ptrace : ptrace failed, errno = %d", errno);
+ CATCH_EINTR(n = waitpid(pid, &status, 0));
+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
+ int exit_with = WEXITSTATUS(status);
+@@ -199,13 +204,15 @@ static int stop_ptraced_child(int pid, i
+ printk("check_ptrace : child exited with exitcode %d, while "
+ "expecting %d; status 0x%x", exit_with,
+ exitcode, status);
+- if (mustexit)
++ if (mustpanic)
+ panic("\n");
+ else
+ printk("\n");
+ ret = -1;
+ }
+
++ if(munmap(stack, PAGE_SIZE) < 0)
++ panic("check_ptrace : munmap failed, errno = %d", errno);
+ return ret;
+ }
+
+@@ -227,11 +234,12 @@ __uml_setup("nosysemu", nosysemu_cmd_par
+
+ static void __init check_sysemu(void)
+ {
++ void *stack;
+ int pid, syscall, n, status, count=0;
+
+ printk("Checking syscall emulation patch for ptrace...");
+ sysemu_supported = 0;
+- pid = start_ptraced_child();
++ pid = start_ptraced_child(&stack);
+
+ if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
+ goto fail;
+@@ -249,7 +257,7 @@ static void __init check_sysemu(void)
+ panic("check_sysemu : failed to modify system "
+ "call return, errno = %d", errno);
+
+- if (stop_ptraced_child(pid, 0, 0) < 0)
++ if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+ goto fail_stopped;
+
+ sysemu_supported = 1;
+@@ -257,7 +265,7 @@ static void __init check_sysemu(void)
+ set_using_sysemu(!force_sysemu_disabled);
+
+ printk("Checking advanced syscall emulation patch for ptrace...");
+- pid = start_ptraced_child();
++ pid = start_ptraced_child(&stack);
+ while(1){
+ count++;
+ if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
+@@ -282,7 +290,7 @@ static void __init check_sysemu(void)
+ break;
+ }
+ }
+- if (stop_ptraced_child(pid, 0, 0) < 0)
++ if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+ goto fail_stopped;
+
+ sysemu_supported = 2;
+@@ -293,17 +301,18 @@ static void __init check_sysemu(void)
+ return;
+
+ fail:
+- stop_ptraced_child(pid, 1, 0);
++ stop_ptraced_child(pid, stack, 1, 0);
+ fail_stopped:
+ printk("missing\n");
+ }
+
+ void __init check_ptrace(void)
+ {
++ void *stack;
+ int pid, syscall, n, status;
+
+ printk("Checking that ptrace can change system call numbers...");
+- pid = start_ptraced_child();
++ pid = start_ptraced_child(&stack);
+
+ if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD)
< 0)
+ panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d",
errno);
+@@ -330,7 +339,7 @@ void __init check_ptrace(void)
+ break;
+ }
+ }
+- stop_ptraced_child(pid, 0, 1);
++ stop_ptraced_child(pid, stack, 0, 1);
+ printk("OK\n");
+ check_sysemu();
+ }
+@@ -362,10 +371,11 @@ void forward_pending_sigio(int target)
+ static inline int check_skas3_ptrace_support(void)
+ {
+ struct ptrace_faultinfo fi;
++ void *stack;
+ int pid, n, ret = 1;
+
+ printf("Checking for the skas3 patch in the host...");
+- pid = start_ptraced_child();
++ pid = start_ptraced_child(&stack);
+
+ n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
+ if (n < 0) {
+@@ -380,7 +390,7 @@ static inline int check_skas3_ptrace_sup
+ }
+
+ init_registers(pid);
+- stop_ptraced_child(pid, 1, 1);
++ stop_ptraced_child(pid, stack, 1, 1);
+
+ return(ret);
+ }
+diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
+--- a/arch/x86_64/ia32/syscall32.c
++++ b/arch/x86_64/ia32/syscall32.c
+@@ -57,6 +57,7 @@ int syscall32_setup_pages(struct linux_b
+ int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
++ int ret;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+@@ -78,7 +79,11 @@ int syscall32_setup_pages(struct linux_b
+ vma->vm_mm = mm;
+
+ down_write(&mm->mmap_sem);
+- insert_vm_struct(mm, vma);
++ if ((ret = insert_vm_struct(mm, vma))) {
++ up_write(&mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, vma);
++ return ret;
++ }
+ mm->total_vm += npages;
+ up_write(&mm->mmap_sem);
+ return 0;
+diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
+--- a/arch/x86_64/kernel/setup.c
++++ b/arch/x86_64/kernel/setup.c
+@@ -729,8 +729,6 @@ static void __init amd_detect_cmp(struct
+ int cpu = smp_processor_id();
+ int node = 0;
+ unsigned bits;
+- if (c->x86_num_cores == 1)
+- return;
+
+ bits = 0;
+ while ((1 << bits) < c->x86_num_cores)
+diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
+--- a/arch/x86_64/kernel/smp.c
++++ b/arch/x86_64/kernel/smp.c
+@@ -284,6 +284,71 @@ struct call_data_struct {
+ static struct call_data_struct * call_data;
+
+ /*
++ * this function sends a 'generic call function' IPI to one other CPU
++ * in the system.
++ */
++static void __smp_call_function_single (int cpu, void (*func) (void *info),
void *info,
++ int nonatomic, int wait)
++{
++ struct call_data_struct data;
++ int cpus = 1;
++
++ data.func = func;
++ data.info = info;
++ atomic_set(&data.started, 0);
++ data.wait = wait;
++ if (wait)
++ atomic_set(&data.finished, 0);
++
++ call_data = &data;
++ wmb();
++ /* Send a message to all other CPUs and wait for them to respond */
++ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
++
++ /* Wait for response */
++ while (atomic_read(&data.started) != cpus)
++ cpu_relax();
++
++ if (!wait)
++ return;
++
++ while (atomic_read(&data.finished) != cpus)
++ cpu_relax();
++}
++
++/*
++ * Run a function on another CPU
++ * <func> The function to run. This must be fast and non-blocking.
++ * <info> An arbitrary pointer to pass to the function.
++ * <nonatomic> Currently unused.
++ * <wait> If true, wait until function has completed on other CPUs.
++ * [RETURNS] 0 on success, else a negative status code.
++ *
++ * Does not return until the remote CPU is nearly ready to execute <func>
++ * or is or has executed.
++ */
++
++int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
++ int nonatomic, int wait)
++{
++
++ int me = get_cpu(); /* prevent preemption and reschedule on another
processor */
++
++ if (cpu == me) {
++ printk("%s: trying to call self\n", __func__);
++ put_cpu();
++ return -EBUSY;
++ }
++ spin_lock_bh(&call_lock);
++
++ __smp_call_function_single(cpu, func,info,nonatomic,wait);
++
++ spin_unlock_bh(&call_lock);
++ put_cpu();
++ return 0;
++}
++
++/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
+--- a/arch/x86_64/kernel/smpboot.c
++++ b/arch/x86_64/kernel/smpboot.c
+@@ -202,9 +202,6 @@ static __cpuinit void sync_master(void *
+ {
+ unsigned long flags, i;
+
+- if (smp_processor_id() != boot_cpu_id)
+- return;
+-
+ go[MASTER] = 0;
+
+ local_irq_save(flags);
+@@ -253,7 +250,7 @@ get_delta(long *rt, long *master)
+ return tcenter - best_tm;
+ }
+
+-static __cpuinit void sync_tsc(void)
++static __cpuinit void sync_tsc(unsigned int master)
+ {
+ int i, done = 0;
+ long delta, adj, adjust_latency = 0;
+@@ -267,9 +264,17 @@ static __cpuinit void sync_tsc(void)
+ } t[NUM_ROUNDS] __cpuinitdata;
+ #endif
+
++ printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
++ smp_processor_id(), master);
++
+ go[MASTER] = 1;
+
+- smp_call_function(sync_master, NULL, 1, 0);
++ /* It is dangerous to broadcast IPI as cpus are coming up,
++ * as they may not be ready to accept them. So since
++ * we only need to send the ipi to the boot cpu direct
++ * the message, and avoid the race.
++ */
++ smp_call_function_single(master, sync_master, NULL, 1, 0);
+
+ while (go[MASTER]) /* wait for master to be ready */
+ no_cpu_relax();
+@@ -313,16 +318,14 @@ static __cpuinit void sync_tsc(void)
+ printk(KERN_INFO
+ "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
+ "maxerr %lu cycles)\n",
+- smp_processor_id(), boot_cpu_id, delta, rt);
++ smp_processor_id(), master, delta, rt);
+ }
+
+ static void __cpuinit tsc_sync_wait(void)
+ {
+ if (notscsync || !cpu_has_tsc)
+ return;
+- printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
+- boot_cpu_id);
+- sync_tsc();
++ sync_tsc(0);
+ }
+
+ static __init int notscsync_setup(char *s)
+diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
+--- a/drivers/acpi/pci_irq.c
++++ b/drivers/acpi/pci_irq.c
+@@ -433,8 +433,9 @@ acpi_pci_irq_enable (
+ printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: no GSI",
+ pci_name(dev), ('A' + pin));
+ /* Interrupt Line values above 0xF are forbidden */
+- if (dev->irq >= 0 && (dev->irq <= 0xF)) {
++ if (dev->irq > 0 && (dev->irq <= 0xF)) {
+ printk(" - using IRQ %d\n", dev->irq);
++ acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE,
ACPI_ACTIVE_LOW);
+ return_VALUE(0);
+ }
+ else {
+diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
+--- a/drivers/char/rocket.c
++++ b/drivers/char/rocket.c
+@@ -277,7 +277,7 @@ static void rp_do_receive(struct r_port
+ ToRecv = space;
+
+ if (ToRecv <= 0)
+- return;
++ goto done;
+
+ /*
+ * if status indicates there are errored characters in the
+@@ -359,6 +359,7 @@ static void rp_do_receive(struct r_port
+ }
+ /* Push the data up to the tty layer */
+ ld->receive_buf(tty, tty->flip.char_buf, tty->flip.flag_buf, count);
++done:
+ tty_ldisc_deref(ld);
+ }
+
+diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
+--- a/drivers/char/tpm/tpm.c
++++ b/drivers/char/tpm/tpm.c
+@@ -32,12 +32,6 @@
+
+ #define TPM_BUFSIZE 2048
+
+-/* PCI configuration addresses */
+-#define PCI_GEN_PMCON_1 0xA0
+-#define PCI_GEN1_DEC 0xE4
+-#define PCI_LPC_EN 0xE6
+-#define PCI_GEN2_DEC 0xEC
+-
+ static LIST_HEAD(tpm_chip_list);
+ static DEFINE_SPINLOCK(driver_lock);
+ static int dev_mask[32];
+@@ -61,72 +55,6 @@ void tpm_time_expired(unsigned long ptr)
+ EXPORT_SYMBOL_GPL(tpm_time_expired);
+
+ /*
+- * Initialize the LPC bus and enable the TPM ports
+- */
+-int tpm_lpc_bus_init(struct pci_dev *pci_dev, u16 base)
+-{
+- u32 lpcenable, tmp;
+- int is_lpcm = 0;
+-
+- switch (pci_dev->vendor) {
+- case PCI_VENDOR_ID_INTEL:
+- switch (pci_dev->device) {
+- case PCI_DEVICE_ID_INTEL_82801CA_12:
+- case PCI_DEVICE_ID_INTEL_82801DB_12:
+- is_lpcm = 1;
+- break;
+- }
+- /* init ICH (enable LPC) */
+- pci_read_config_dword(pci_dev, PCI_GEN1_DEC, &lpcenable);
+- lpcenable |= 0x20000000;
+- pci_write_config_dword(pci_dev, PCI_GEN1_DEC, lpcenable);
+-
+- if (is_lpcm) {
+- pci_read_config_dword(pci_dev, PCI_GEN1_DEC,
+- &lpcenable);
+- if ((lpcenable & 0x20000000) == 0) {
+- dev_err(&pci_dev->dev,
+- "cannot enable LPC\n");
+- return -ENODEV;
+- }
+- }
+-
+- /* initialize TPM registers */
+- pci_read_config_dword(pci_dev, PCI_GEN2_DEC, &tmp);
+-
+- if (!is_lpcm)
+- tmp = (tmp & 0xFFFF0000) | (base & 0xFFF0);
+- else
+- tmp =
+- (tmp & 0xFFFF0000) | (base & 0xFFF0) |
+- 0x00000001;
+-
+- pci_write_config_dword(pci_dev, PCI_GEN2_DEC, tmp);
+-
+- if (is_lpcm) {
+- pci_read_config_dword(pci_dev, PCI_GEN_PMCON_1,
+- &tmp);
+- tmp |= 0x00000004; /* enable CLKRUN */
+- pci_write_config_dword(pci_dev, PCI_GEN_PMCON_1,
+- tmp);
+- }
+- tpm_write_index(0x0D, 0x55); /* unlock 4F */
+- tpm_write_index(0x0A, 0x00); /* int disable */
+- tpm_write_index(0x08, base); /* base addr lo */
+- tpm_write_index(0x09, (base & 0xFF00) >> 8); /* base addr hi
*/
+- tpm_write_index(0x0D, 0xAA); /* lock 4F */
+- break;
+- case PCI_VENDOR_ID_AMD:
+- /* nothing yet */
+- break;
+- }
+-
+- return 0;
+-}
+-
+-EXPORT_SYMBOL_GPL(tpm_lpc_bus_init);
+-
+-/*
+ * Internal kernel interface to transmit TPM commands
+ */
+ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+@@ -590,10 +518,6 @@ int tpm_pm_resume(struct pci_dev *pci_de
+ if (chip == NULL)
+ return -ENODEV;
+
+- spin_lock(&driver_lock);
+- tpm_lpc_bus_init(pci_dev, chip->vendor->base);
+- spin_unlock(&driver_lock);
+-
+ return 0;
+ }
+
+diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
+--- a/drivers/char/tpm/tpm.h
++++ b/drivers/char/tpm/tpm.h
+@@ -79,8 +79,6 @@ static inline void tpm_write_index(int i
+ }
+
+ extern void tpm_time_expired(unsigned long);
+-extern int tpm_lpc_bus_init(struct pci_dev *, u16);
+-
+ extern int tpm_register_hardware(struct pci_dev *,
+ struct tpm_vendor_specific *);
+ extern int tpm_open(struct inode *, struct file *);
+diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
+--- a/drivers/char/tpm/tpm_atmel.c
++++ b/drivers/char/tpm/tpm_atmel.c
+@@ -22,7 +22,10 @@
+ #include "tpm.h"
+
+ /* Atmel definitions */
+-#define TPM_ATML_BASE 0x400
++enum tpm_atmel_addr {
++ TPM_ATMEL_BASE_ADDR_LO = 0x08,
++ TPM_ATMEL_BASE_ADDR_HI = 0x09
++};
+
+ /* write status bits */
+ #define ATML_STATUS_ABORT 0x01
+@@ -127,7 +130,6 @@ static struct tpm_vendor_specific tpm_at
+ .cancel = tpm_atml_cancel,
+ .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL,
+ .req_complete_val = ATML_STATUS_DATA_AVAIL,
+- .base = TPM_ATML_BASE,
+ .miscdev = { .fops = &atmel_ops, },
+ };
+
+@@ -136,14 +138,16 @@ static int __devinit tpm_atml_init(struc
+ {
+ u8 version[4];
+ int rc = 0;
++ int lo, hi;
+
+ if (pci_enable_device(pci_dev))
+ return -EIO;
+
+- if (tpm_lpc_bus_init(pci_dev, TPM_ATML_BASE)) {
+- rc = -ENODEV;
+- goto out_err;
+- }
++ lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO );
++ hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI );
++
++ tpm_atmel.base = (hi<<8)|lo;
++ dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
+
+ /* verify that it is an Atmel part */
+ if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T'
+diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
+--- a/drivers/char/tpm/tpm_nsc.c
++++ b/drivers/char/tpm/tpm_nsc.c
+@@ -24,6 +24,10 @@
+ /* National definitions */
+ #define TPM_NSC_BASE 0x360
+ #define TPM_NSC_IRQ 0x07
++#define TPM_NSC_BASE0_HI 0x60
++#define TPM_NSC_BASE0_LO 0x61
++#define TPM_NSC_BASE1_HI 0x62
++#define TPM_NSC_BASE1_LO 0x63
+
+ #define NSC_LDN_INDEX 0x07
+ #define NSC_SID_INDEX 0x20
+@@ -234,7 +238,6 @@ static struct tpm_vendor_specific tpm_ns
+ .cancel = tpm_nsc_cancel,
+ .req_complete_mask = NSC_STATUS_OBF,
+ .req_complete_val = NSC_STATUS_OBF,
+- .base = TPM_NSC_BASE,
+ .miscdev = { .fops = &nsc_ops, },
+
+ };
+@@ -243,15 +246,16 @@ static int __devinit tpm_nsc_init(struct
+ const struct pci_device_id *pci_id)
+ {
+ int rc = 0;
++ int lo, hi;
++
++ hi = tpm_read_index(TPM_NSC_BASE0_HI);
++ lo = tpm_read_index(TPM_NSC_BASE0_LO);
++
++ tpm_nsc.base = (hi<<8) | lo;
+
+ if (pci_enable_device(pci_dev))
+ return -EIO;
+
+- if (tpm_lpc_bus_init(pci_dev, TPM_NSC_BASE)) {
+- rc = -ENODEV;
+- goto out_err;
+- }
+-
+ /* verify that it is a National part (SID) */
+ if (tpm_read_index(NSC_SID_INDEX) != 0xEF) {
+ rc = -ENODEV;
+diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
+--- a/drivers/char/tty_ioctl.c
++++ b/drivers/char/tty_ioctl.c
+@@ -476,11 +476,11 @@ int n_tty_ioctl(struct tty_struct * tty,
+ ld = tty_ldisc_ref(tty);
+ switch (arg) {
+ case TCIFLUSH:
+- if (ld->flush_buffer)
++ if (ld && ld->flush_buffer)
+ ld->flush_buffer(tty);
+ break;
+ case TCIOFLUSH:
+- if (ld->flush_buffer)
++ if (ld && ld->flush_buffer)
+ ld->flush_buffer(tty);
+ /* fall through */
+ case TCOFLUSH:
+diff --git a/drivers/media/video/cx88/cx88-video.c
b/drivers/media/video/cx88/cx88-video.c
+--- a/drivers/media/video/cx88/cx88-video.c
++++ b/drivers/media/video/cx88/cx88-video.c
+@@ -261,7 +261,7 @@ static struct cx88_ctrl cx8800_ctls[] =
+ .default_value = 0,
+ .type = V4L2_CTRL_TYPE_INTEGER,
+ },
+- .off = 0,
++ .off = 128,
+ .reg = MO_HUE,
+ .mask = 0x00ff,
+ .shift = 0,
+diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
+--- a/drivers/net/e1000/e1000_main.c
++++ b/drivers/net/e1000/e1000_main.c
+@@ -2307,6 +2307,7 @@ e1000_xmit_frame(struct sk_buff *skb, st
+ tso = e1000_tso(adapter, skb);
+ if (tso < 0) {
+ dev_kfree_skb_any(skb);
++ spin_unlock_irqrestore(&adapter->tx_lock, flags);
+ return NETDEV_TX_OK;
+ }
+
+diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
+--- a/drivers/net/hamradio/Kconfig
++++ b/drivers/net/hamradio/Kconfig
+@@ -17,7 +17,7 @@ config MKISS
+
+ config 6PACK
+ tristate "Serial port 6PACK driver"
+- depends on AX25 && BROKEN_ON_SMP
++ depends on AX25
+ ---help---
+ 6pack is a transmission protocol for the data exchange between your
+ PC and your TNC (the Terminal Node Controller acts as a kind of
+diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c
+--- a/drivers/net/shaper.c
++++ b/drivers/net/shaper.c
+@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_b
+ {
+ struct shaper *shaper = dev->priv;
+ struct sk_buff *ptr;
+-
+- if (down_trylock(&shaper->sem))
+- return -1;
+
++ spin_lock(&shaper->lock);
+ ptr=shaper->sendq.prev;
+
+ /*
+@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_b
+ shaper->stats.collisions++;
+ }
+ shaper_kick(shaper);
+- up(&shaper->sem);
++ spin_unlock(&shaper->lock);
+ return 0;
+ }
+
+@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long d
+ {
+ struct shaper *shaper = (struct shaper *)data;
+
+- if (!down_trylock(&shaper->sem)) {
+- shaper_kick(shaper);
+- up(&shaper->sem);
+- } else
+- mod_timer(&shaper->timer, jiffies);
++ spin_lock(&shaper->lock);
++ shaper_kick(shaper);
++ spin_unlock(&shaper->lock);
+ }
+
+ /*
+@@ -332,21 +328,6 @@ static void shaper_kick(struct shaper *s
+
+
+ /*
+- * Flush the shaper queues on a closedown
+- */
+-
+-static void shaper_flush(struct shaper *shaper)
+-{
+- struct sk_buff *skb;
+-
+- down(&shaper->sem);
+- while((skb=skb_dequeue(&shaper->sendq))!=NULL)
+- dev_kfree_skb(skb);
+- shaper_kick(shaper);
+- up(&shaper->sem);
+-}
+-
+-/*
+ * Bring the interface up. We just disallow this until a
+ * bind.
+ */
+@@ -375,7 +356,15 @@ static int shaper_open(struct net_device
+ static int shaper_close(struct net_device *dev)
+ {
+ struct shaper *shaper=dev->priv;
+- shaper_flush(shaper);
++ struct sk_buff *skb;
++
++ while ((skb = skb_dequeue(&shaper->sendq)) != NULL)
++ dev_kfree_skb(skb);
++
++ spin_lock_bh(&shaper->lock);
++ shaper_kick(shaper);
++ spin_unlock_bh(&shaper->lock);
++
+ del_timer_sync(&shaper->timer);
+ return 0;
+ }
+@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_
+ init_timer(&sh->timer);
+ sh->timer.function=shaper_timer;
+ sh->timer.data=(unsigned long)sh;
++ spin_lock_init(&sh->lock);
+ }
+
+ /*
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -396,7 +396,7 @@ int pci_register_driver(struct pci_drive
+ /* FIXME, once all of the existing PCI drivers have been fixed to set
+ * the pci shutdown function, this test can go away. */
+ if (!drv->driver.shutdown)
+- drv->driver.shutdown = pci_device_shutdown,
++ drv->driver.shutdown = pci_device_shutdown;
+ drv->driver.owner = drv->owner;
+ drv->driver.kobj.ktype = &pci_driver_kobj_type;
+ pci_init_dynids(&drv->dynids);
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -1914,9 +1914,11 @@ qla2x00_reg_remote_port(scsi_qla_host_t
+ rport_ids.roles |= FC_RPORT_ROLE_FCP_TARGET;
+
+ fcport->rport = rport = fc_remote_port_add(ha->host, 0, &rport_ids);
+- if (!rport)
++ if (!rport) {
+ qla_printk(KERN_WARNING, ha,
+ "Unable to allocate fc remote port!\n");
++ return;
++ }
+
+ if (rport->scsi_target_id != -1 && rport->scsi_target_id < MAX_TARGETS)
+ fcport->os_target_id = rport->scsi_target_id;
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -1150,7 +1150,7 @@ iospace_error_exit:
+ */
+ int qla2x00_probe_one(struct pci_dev *pdev, struct qla_board_info *brd_info)
+ {
+- int ret;
++ int ret = -ENODEV;
+ device_reg_t __iomem *reg;
+ struct Scsi_Host *host;
+ scsi_qla_host_t *ha;
+@@ -1161,7 +1161,7 @@ int qla2x00_probe_one(struct pci_dev *pd
+ fc_port_t *fcport;
+
+ if (pci_enable_device(pdev))
+- return -1;
++ goto probe_out;
+
+ host = scsi_host_alloc(&qla2x00_driver_template,
+ sizeof(scsi_qla_host_t));
+@@ -1183,9 +1183,8 @@ int qla2x00_probe_one(struct pci_dev *pd
+
+ /* Configure PCI I/O space */
+ ret = qla2x00_iospace_config(ha);
+- if (ret != 0) {
+- goto probe_alloc_failed;
+- }
++ if (ret)
++ goto probe_failed;
+
+ /* Sanitize the information from PCI BIOS. */
+ host->irq = pdev->irq;
+@@ -1258,23 +1257,10 @@ int qla2x00_probe_one(struct pci_dev *pd
+ qla_printk(KERN_WARNING, ha,
+ "[ERROR] Failed to allocate memory for adapter\n");
+
+- goto probe_alloc_failed;
++ ret = -ENOMEM;
++ goto probe_failed;
+ }
+
+- pci_set_drvdata(pdev, ha);
+- host->this_id = 255;
+- host->cmd_per_lun = 3;
+- host->unique_id = ha->instance;
+- host->max_cmd_len = MAX_CMDSZ;
+- host->max_channel = ha->ports - 1;
+- host->max_id = ha->max_targets;
+- host->max_lun = ha->max_luns;
+- host->transportt = qla2xxx_transport_template;
+- if (scsi_add_host(host, &pdev->dev))
+- goto probe_alloc_failed;
+-
+- qla2x00_alloc_sysfs_attr(ha);
+-
+ if (qla2x00_initialize_adapter(ha) &&
+ !(ha->device_flags & DFLG_NO_CABLE)) {
+
+@@ -1285,11 +1271,10 @@ int qla2x00_probe_one(struct pci_dev *pd
+ "Adapter flags %x.\n",
+ ha->host_no, ha->device_flags));
+
++ ret = -ENODEV;
+ goto probe_failed;
+ }
+
+- qla2x00_init_host_attr(ha);
+-
+ /*
+ * Startup the kernel thread for this host adapter
+ */
+@@ -1299,17 +1284,26 @@ int qla2x00_probe_one(struct pci_dev *pd
+ qla_printk(KERN_WARNING, ha,
+ "Unable to start DPC thread!\n");
+
++ ret = -ENODEV;
+ goto probe_failed;
+ }
+ wait_for_completion(&ha->dpc_inited);
+
++ host->this_id = 255;
++ host->cmd_per_lun = 3;
++ host->unique_id = ha->instance;
++ host->max_cmd_len = MAX_CMDSZ;
++ host->max_channel = ha->ports - 1;
++ host->max_lun = MAX_LUNS;
++ host->transportt = qla2xxx_transport_template;
++
+ if (IS_QLA2100(ha) || IS_QLA2200(ha))
+ ret = request_irq(host->irq, qla2100_intr_handler,
+ SA_INTERRUPT|SA_SHIRQ, ha->brd_info->drv_name, ha);
+ else
+ ret = request_irq(host->irq, qla2300_intr_handler,
+ SA_INTERRUPT|SA_SHIRQ, ha->brd_info->drv_name, ha);
+- if (ret != 0) {
++ if (ret) {
+ qla_printk(KERN_WARNING, ha,
+ "Failed to reserve interrupt %d already in use.\n",
+ host->irq);
+@@ -1363,9 +1357,18 @@ int qla2x00_probe_one(struct pci_dev *pd
+ msleep(10);
+ }
+
++ pci_set_drvdata(pdev, ha);
+ ha->flags.init_done = 1;
+ num_hosts++;
+
++ ret = scsi_add_host(host, &pdev->dev);
++ if (ret)
++ goto probe_failed;
++
++ qla2x00_alloc_sysfs_attr(ha);
++
++ qla2x00_init_host_attr(ha);
++
+ qla_printk(KERN_INFO, ha, "\n"
+ " QLogic Fibre Channel HBA Driver: %s\n"
+ " QLogic %s - %s\n"
+@@ -1384,9 +1387,6 @@ int qla2x00_probe_one(struct pci_dev *pd
+ probe_failed:
+ fc_remove_host(ha->host);
+
+- scsi_remove_host(host);
+-
+-probe_alloc_failed:
+ qla2x00_free_device(ha);
+
+ scsi_host_put(host);
+@@ -1394,7 +1394,8 @@ probe_alloc_failed:
+ probe_disable_device:
+ pci_disable_device(pdev);
+
+- return -1;
++probe_out:
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(qla2x00_probe_one);
+
+diff --git a/fs/bio.c b/fs/bio.c
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -261,6 +261,7 @@ inline void __bio_clone(struct bio *bio,
+ */
+ bio->bi_vcnt = bio_src->bi_vcnt;
+ bio->bi_size = bio_src->bi_size;
++ bio->bi_idx = bio_src->bi_idx;
+ bio_phys_segments(q, bio);
+ bio_hw_segments(q, bio);
+ }
+diff --git a/fs/char_dev.c b/fs/char_dev.c
+--- a/fs/char_dev.c
++++ b/fs/char_dev.c
+@@ -139,7 +139,7 @@ __unregister_chrdev_region(unsigned majo
+ struct char_device_struct *cd = NULL, **cp;
+ int i = major_to_index(major);
+
+- up(&chrdevs_lock);
++ down(&chrdevs_lock);
+ for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
+ if ((*cp)->major == major &&
+ (*cp)->baseminor == baseminor &&
+diff --git a/fs/exec.c b/fs/exec.c
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -649,6 +649,7 @@ static inline int de_thread(struct task_
+ }
+ sig->group_exit_task = NULL;
+ sig->notify_count = 0;
++ sig->real_timer.data = (unsigned long)current;
+ spin_unlock_irq(lock);
+
+ /*
+diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
+--- a/fs/isofs/compress.c
++++ b/fs/isofs/compress.c
+@@ -129,8 +129,14 @@ static int zisofs_readpage(struct file *
+ cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
+ brelse(bh);
+
++ if (cstart > cend)
++ goto eio;
++
+ csize = cend-cstart;
+
++ if (csize > deflateBound(1UL << zisofs_block_shift))
++ goto eio;
++
+ /* Now page[] contains an array of pages, any of which can be NULL,
+ and the locks on which we hold. We should now read the data and
+ release the pages. If the pages are NULL the decompressed data
+diff --git a/include/asm-i386/string.h b/include/asm-i386/string.h
+--- a/include/asm-i386/string.h
++++ b/include/asm-i386/string.h
+@@ -116,7 +116,8 @@ __asm__ __volatile__(
+ "orb $1,%%al\n"
+ "3:"
+ :"=a" (__res), "=&S" (d0), "=&D" (d1)
+- :"1" (cs),"2" (ct));
++ :"1" (cs),"2" (ct)
++ :"memory");
+ return __res;
+ }
+
+@@ -138,8 +139,9 @@ __asm__ __volatile__(
+ "3:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "4:"
+- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+- :"1" (cs),"2" (ct),"3" (count));
++ :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
++ :"1" (cs),"2" (ct),"3" (count)
++ :"memory");
+ return __res;
+ }
+
+@@ -158,7 +160,9 @@ __asm__ __volatile__(
+ "movl $1,%1\n"
+ "2:\tmovl %1,%0\n\t"
+ "decl %0"
+- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
++ :"=a" (__res), "=&S" (d0)
++ :"1" (s),"0" (c)
++ :"memory");
+ return __res;
+ }
+
+@@ -175,7 +179,9 @@ __asm__ __volatile__(
+ "leal -1(%%esi),%0\n"
+ "2:\ttestb %%al,%%al\n\t"
+ "jne 1b"
+- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
++ :"=g" (__res), "=&S" (d0), "=&a" (d1)
++ :"0" (0),"1" (s),"2" (c)
++ :"memory");
+ return __res;
+ }
+
+@@ -189,7 +195,9 @@ __asm__ __volatile__(
+ "scasb\n\t"
+ "notl %0\n\t"
+ "decl %0"
+- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu));
++ :"=c" (__res), "=&D" (d0)
++ :"1" (s),"a" (0), "0" (0xffffffffu)
++ :"memory");
+ return __res;
+ }
+
+@@ -333,7 +341,9 @@ __asm__ __volatile__(
+ "je 1f\n\t"
+ "movl $1,%0\n"
+ "1:\tdecl %0"
+- :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
++ :"=D" (__res), "=&c" (d0)
++ :"a" (c),"0" (cs),"1" (count)
++ :"memory");
+ return __res;
+ }
+
+@@ -369,7 +379,7 @@ __asm__ __volatile__(
+ "je 2f\n\t"
+ "stosb\n"
+ "2:"
+- : "=&c" (d0), "=&D" (d1)
++ :"=&c" (d0), "=&D" (d1)
+ :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+ :"memory");
+ return (s);
+@@ -392,7 +402,8 @@ __asm__ __volatile__(
+ "jne 1b\n"
+ "3:\tsubl %2,%0"
+ :"=a" (__res), "=&d" (d0)
+- :"c" (s),"1" (count));
++ :"c" (s),"1" (count)
++ :"memory");
+ return __res;
+ }
+ /* end of additional stuff */
+@@ -473,7 +484,8 @@ static inline void * memscan(void * addr
+ "dec %%edi\n"
+ "1:"
+ : "=D" (addr), "=c" (size)
+- : "0" (addr), "1" (size), "a" (c));
++ : "0" (addr), "1" (size), "a" (c)
++ : "memory");
+ return addr;
+ }
+
+diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
+--- a/include/asm-x86_64/smp.h
++++ b/include/asm-x86_64/smp.h
+@@ -46,6 +46,8 @@ extern int pic_mode;
+ extern int smp_num_siblings;
+ extern void smp_flush_tlb(void);
+ extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
++extern int smp_call_function_single (int cpuid, void (*func) (void *info),
void *info,
++ int retry, int wait);
+ extern void smp_send_reschedule(int cpu);
+ extern void smp_invalidate_rcv(void); /* Process an NMI */
+ extern void zap_low_mappings(void);
+diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h
+--- a/include/linux/if_shaper.h
++++ b/include/linux/if_shaper.h
+@@ -23,7 +23,7 @@ struct shaper
+ __u32 shapeclock;
+ unsigned long recovery; /* Time we can next clock a packet out on
+ an empty queue */
+- struct semaphore sem;
++ spinlock_t lock;
+ struct net_device_stats stats;
+ struct net_device *dev;
+ int (*hard_start_xmit) (struct sk_buff *skb,
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1192,7 +1192,7 @@ static inline void *skb_header_pointer(c
+ {
+ int hlen = skb_headlen(skb);
+
+- if (offset + len <= hlen)
++ if (hlen - offset >= len)
+ return skb->data + offset;
+
+ if (skb_copy_bits(skb, offset, buffer, len) < 0)
+diff --git a/include/linux/zlib.h b/include/linux/zlib.h
+--- a/include/linux/zlib.h
++++ b/include/linux/zlib.h
+@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp
+ stream state was inconsistent (such as zalloc or state being NULL).
+ */
+
++static inline unsigned long deflateBound(unsigned long s)
++{
++ return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
++}
++
+ extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
+ /*
+ Dynamically update the compression level and compression strategy. The
+diff --git a/kernel/module.c b/kernel/module.c
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -249,13 +249,18 @@ static inline unsigned int block_size(in
+ /* Created by linker magic */
+ extern char __per_cpu_start[], __per_cpu_end[];
+
+-static void *percpu_modalloc(unsigned long size, unsigned long align)
++static void *percpu_modalloc(unsigned long size, unsigned long align,
++ const char *name)
+ {
+ unsigned long extra;
+ unsigned int i;
+ void *ptr;
+
+- BUG_ON(align > SMP_CACHE_BYTES);
++ if (align > SMP_CACHE_BYTES) {
++ printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
++ name, align, SMP_CACHE_BYTES);
++ align = SMP_CACHE_BYTES;
++ }
+
+ ptr = __per_cpu_start;
+ for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
+@@ -347,7 +352,8 @@ static int percpu_modinit(void)
+ }
+ __initcall(percpu_modinit);
+ #else /* ... !CONFIG_SMP */
+-static inline void *percpu_modalloc(unsigned long size, unsigned long align)
++static inline void *percpu_modalloc(unsigned long size, unsigned long align,
++ const char *name)
+ {
+ return NULL;
+ }
+@@ -1554,7 +1560,8 @@ static struct module *load_module(void _
+ if (pcpuindex) {
+ /* We have a special allocation for this section. */
+ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
+- sechdrs[pcpuindex].sh_addralign);
++ sechdrs[pcpuindex].sh_addralign,
++ mod->name);
+ if (!percpu) {
+ err = -ENOMEM;
+ goto free_mod;
+diff --git a/lib/inflate.c b/lib/inflate.c
+--- a/lib/inflate.c
++++ b/lib/inflate.c
+@@ -326,7 +326,7 @@ DEBG("huft1 ");
+ {
+ *t = (struct huft *)NULL;
+ *m = 0;
+- return 0;
++ return 2;
+ }
+
+ DEBG("huft2 ");
+@@ -374,6 +374,7 @@ DEBG("huft5 ");
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
++ n = x[g]; /* set n to length of v */
+
+ DEBG("h6 ");
+
+@@ -410,12 +411,13 @@ DEBG1("1 ");
+ DEBG1("2 ");
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+- while (++j < z) /* try smaller tables up to z bits */
+- {
+- if ((f <<= 1) <= *++xp)
+- break; /* enough codes to use up j bits */
+- f -= *xp; /* else deduct codes from patterns */
+- }
++ if (j < z)
++ while (++j < z) /* try smaller tables up to z bits */
++ {
++ if ((f <<= 1) <= *++xp)
++ break; /* enough codes to use up j bits */
++ f -= *xp; /* else deduct codes from patterns */
++ }
+ }
+ DEBG1("3 ");
+ z = 1 << j; /* table entries for j-bit table */
+diff --git a/lib/zlib_inflate/inftrees.c b/lib/zlib_inflate/inftrees.c
+--- a/lib/zlib_inflate/inftrees.c
++++ b/lib/zlib_inflate/inftrees.c
+@@ -141,7 +141,7 @@ static int huft_build(
+ {
+ *t = NULL;
+ *m = 0;
+- return Z_OK;
++ return Z_DATA_ERROR;
+ }
+
+
+diff --git a/mm/memory.c b/mm/memory.c
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1164,7 +1164,7 @@ int remap_pfn_range(struct vm_area_struc
+ {
+ pgd_t *pgd;
+ unsigned long next;
+- unsigned long end = addr + size;
++ unsigned long end = addr + PAGE_ALIGN(size);
+ struct mm_struct *mm = vma->vm_mm;
+ int err;
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -409,7 +409,7 @@ asmlinkage long sys_set_mempolicy(int mo
+ struct mempolicy *new;
+ DECLARE_BITMAP(nodes, MAX_NUMNODES);
+
+- if (mode > MPOL_MAX)
++ if (mode < 0 || mode > MPOL_MAX)
+ return -EINVAL;
+ err = get_nodes(nodes, nmask, maxnode, mode);
+ if (err)
+diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
+--- a/net/8021q/vlan.c
++++ b/net/8021q/vlan.c
+@@ -578,6 +578,14 @@ static int vlan_device_event(struct noti
+ if (!vlandev)
+ continue;
+
++ if (netif_carrier_ok(dev)) {
++ if (!netif_carrier_ok(vlandev))
++ netif_carrier_on(vlandev);
++ } else {
++ if (netif_carrier_ok(vlandev))
++ netif_carrier_off(vlandev);
++ }
++
+ if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
+ vlandev->state = (vlandev->state &~
VLAN_LINK_STATE_MASK)
+ | flgs;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -111,7 +111,6 @@ static int ip_dev_loopback_xmit(struct s
+ #ifdef CONFIG_NETFILTER_DEBUG
+ nf_debug_ip_loopback_xmit(newskb);
+ #endif
+- nf_reset(newskb);
+ netif_rx(newskb);
+ return 0;
+ }
+@@ -196,8 +195,6 @@ static inline int ip_finish_output2(stru
+ nf_debug_ip_finish_output2(skb);
+ #endif /*CONFIG_NETFILTER_DEBUG*/
+
+- nf_reset(skb);
+-
+ if (hh) {
+ int hh_alen;
+
+diff --git a/net/ipv4/netfilter/ip_conntrack_core.c
b/net/ipv4/netfilter/ip_conntrack_core.c
+--- a/net/ipv4/netfilter/ip_conntrack_core.c
++++ b/net/ipv4/netfilter/ip_conntrack_core.c
+@@ -1124,6 +1124,9 @@ void ip_conntrack_cleanup(void)
+ schedule();
+ goto i_see_dead_people;
+ }
++ /* wait until all references to ip_conntrack_untracked are dropped */
++ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
++ schedule();
+
+ kmem_cache_destroy(ip_conntrack_cachep);
+ kmem_cache_destroy(ip_conntrack_expect_cachep);
+diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c
b/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
++++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
+@@ -432,6 +432,13 @@ static unsigned int ip_conntrack_defrag(
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
++ /* Previously seen (loopback)? Ignore. Do this before
++ fragment check. */
++ if ((*pskb)->nfct)
++ return NF_ACCEPT;
++#endif
++
+ /* Gather fragments. */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ *pskb = ip_ct_gather_frags(*pskb,
+diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c
b/net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
++++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
+@@ -40,7 +40,8 @@ tcp_unique_tuple(struct ip_conntrack_tup
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+ {
+- static u_int16_t port, *portptr;
++ static u_int16_t port;
++ u_int16_t *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c
b/net/ipv4/netfilter/ip_nat_proto_udp.c
+--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
++++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
+@@ -41,7 +41,8 @@ udp_unique_tuple(struct ip_conntrack_tup
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+ {
+- static u_int16_t port, *portptr;
++ static u_int16_t port;
++ u_int16_t *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
+--- a/net/ipv6/netfilter/ip6_queue.c
++++ b/net/ipv6/netfilter/ip6_queue.c
+@@ -76,7 +76,9 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++ local_bh_disable();
+ nf_reinject(entry->skb, entry->info, verdict);
++ local_bh_enable();
+ kfree(entry);
+ }
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -315,8 +315,8 @@ err:
+ static void netlink_remove(struct sock *sk)
+ {
+ netlink_table_grab();
+- nl_table[sk->sk_protocol].hash.entries--;
+- sk_del_node_init(sk);
++ if (sk_del_node_init(sk))
++ nl_table[sk->sk_protocol].hash.entries--;
+ if (nlk_sk(sk)->groups)
+ __sk_del_bind_node(sk);
+ netlink_table_ungrab();
+@@ -429,7 +429,12 @@ retry:
+ err = netlink_insert(sk, pid);
+ if (err == -EADDRINUSE)
+ goto retry;
+- return 0;
++
++ /* If 2 threads race to autobind, that is fine. */
++ if (err == -EBUSY)
++ err = 0;
++
++ return err;
+ }
+
+ static inline int netlink_capable(struct socket *sock, unsigned int flag)
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -274,6 +274,9 @@ static int packet_rcv_spkt(struct sk_buf
+ dst_release(skb->dst);
+ skb->dst = NULL;
+
++ /* drop conntrack reference */
++ nf_reset(skb);
++
+ spkt = (struct sockaddr_pkt*)skb->cb;
+
+ skb_push(skb, skb->data-skb->mac.raw);
+@@ -517,6 +520,9 @@ static int packet_rcv(struct sk_buff *sk
+ dst_release(skb->dst);
+ skb->dst = NULL;
+
++ /* drop conntrack reference */
++ nf_reset(skb);
++
+ spin_lock(&sk->sk_receive_queue.lock);
+ po->stats.tp_packets++;
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1180,6 +1180,9 @@ static struct xfrm_policy *xfrm_compile_
+ if (nr > XFRM_MAX_DEPTH)
+ return NULL;
+
++ if (p->dir > XFRM_POLICY_OUT)
++ return NULL;
++
+ xp = xfrm_policy_alloc(GFP_KERNEL);
+ if (xp == NULL) {
+ *dir = -ENOBUFS;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -188,7 +188,11 @@ static void keyring_destroy(struct key *
+
+ if (keyring->description) {
+ write_lock(&keyring_name_lock);
+- list_del(&keyring->type_data.link);
++
++ if (keyring->type_data.link.next != NULL &&
++ !list_empty(&keyring->type_data.link))
++ list_del(&keyring->type_data.link);
++
+ write_unlock(&keyring_name_lock);
+ }
+
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -641,7 +641,7 @@ long join_session_keyring(const char *na
+ keyring = keyring_alloc(name, tsk->uid, tsk->gid, 0, NULL);
+ if (IS_ERR(keyring)) {
+ ret = PTR_ERR(keyring);
+- goto error;
++ goto error2;
+ }
+ }
+ else if (IS_ERR(keyring)) {
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,64 @@
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+PARALLAX_INSTALL_DIR = /usr/sbin
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
+
+LDFLAGS = -L.. -lpthread -lz -lblktap
+
+#PLX_SRCS :=
+PLX_SRCS := vdi.c
+PLX_SRCS += radix.c
+PLX_SRCS += snaplog.c
+PLX_SRCS += blockstore.c
+PLX_SRCS += block-async.c
+PLX_SRCS += requests-async.c
+VDI_SRCS := $(PLX_SRCS)
+PLX_SRCS += parallax.c
+
+#VDI_TOOLS :=
+VDI_TOOLS := vdi_create
+VDI_TOOLS += vdi_list
+VDI_TOOLS += vdi_snap
+VDI_TOOLS += vdi_snap_list
+VDI_TOOLS += vdi_snap_delete
+VDI_TOOLS += vdi_fill
+VDI_TOOLS += vdi_tree
+VDI_TOOLS += vdi_validate
+
+CFLAGS += -Wall
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+#CFLAGS += -O3
+CFLAGS += -g3
+CFLAGS += -fno-strict-aliasing
+CFLAGS += $(INCLUDES)
+CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+IBINS = parallax $(VDI_TOOLS)
+
+all: $(VDI_TOOLS) parallax blockstored
+
+install: all
+ $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
+
+clean:
+ rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
+
+parallax: $(PLX_SRCS)
+ $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
+
+${VDI_TOOLS}: %: %.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
+
+.PHONY: TAGS clean install rpm
+-include $(DEPS)
\ No newline at end of file
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,39 @@
+
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+DAEMON_INSTALL_DIR = /usr/sbin
+CLIENT_INSTALL_DIR = /usr/libexec/xen
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+CC = gcc
+CFLAGS = -Wall -Werror -g3
+
+CFLAGS += -I $(XEN_XCS)
+CFLAGS += -I $(XEN_LIBXC)
+CFLAGS += -I $(XEN_XENSTORE)
+
+BIN = xenconsoled xenconsole
+
+all: $(BIN)
+
+clean:
+ $(RM) *.a *.so *.o *.rpm $(BIN)
+ $(RM) client/*.o daemon/*.o
+
+xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
+ $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
+ -lxenctrl -lxenstore
+
+xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c))
+ $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
+ -lxenctrl -lxenstore
+
+install: $(BIN)
+ $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+ $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+ $(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR)
+ $(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/client/main.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/client/main.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,236 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/select.h>
+#include <err.h>
+#include <errno.h>
+#include <pty.h>
+
+#include "xenctrl.h"
+#include "xs.h"
+
+#define ESCAPE_CHARACTER 0x1d
+
+static volatile sig_atomic_t received_signal = 0;
+
+static void sighandler(int signum)
+{
+ received_signal = 1;
+}
+
+static bool write_sync(int fd, const void *data, size_t size)
+{
+ size_t offset = 0;
+ ssize_t len;
+
+ while (offset < size) {
+ len = write(fd, data + offset, size - offset);
+ if (len < 1) {
+ return false;
+ }
+ offset += len;
+ }
+
+ return true;
+}
+
+static void usage(const char *program) {
+ printf("Usage: %s [OPTION] DOMID\n"
+ "Attaches to a virtual domain console\n"
+ "\n"
+ " -h, --help display this help and exit\n"
+ , program);
+}
+
+/* don't worry too much if setting terminal attributes fail */
+static void init_term(int fd, struct termios *old)
+{
+ struct termios new_term;
+
+ if (tcgetattr(fd, old) == -1) {
+ return;
+ }
+
+ new_term = *old;
+ cfmakeraw(&new_term);
+
+ tcsetattr(fd, TCSAFLUSH, &new_term);
+}
+
+static void restore_term(int fd, struct termios *old)
+{
+ tcsetattr(fd, TCSAFLUSH, old);
+}
+
+static int console_loop(int xc_handle, domid_t domid, int fd)
+{
+ int ret;
+
+ do {
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ FD_SET(STDIN_FILENO, &fds);
+ FD_SET(fd, &fds);
+
+ ret = select(fd + 1, &fds, NULL, NULL, NULL);
+ if (ret == -1) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ }
+ return -1;
+ }
+
+ if (FD_ISSET(STDIN_FILENO, &fds)) {
+ ssize_t len;
+ char msg[60];
+
+ len = read(STDIN_FILENO, msg, sizeof(msg));
+ if (len == 1 && msg[0] == ESCAPE_CHARACTER) {
+ return 0;
+ }
+
+ if (len == 0 || len == -1) {
+ if (len == -1 &&
+ (errno == EINTR || errno == EAGAIN)) {
+ continue;
+ }
+ return -1;
+ }
+
+ if (!write_sync(fd, msg, len)) {
+ perror("write() failed");
+ return -1;
+ }
+ }
+
+ if (FD_ISSET(fd, &fds)) {
+ ssize_t len;
+ char msg[512];
+
+ len = read(fd, msg, sizeof(msg));
+ if (len == 0 || len == -1) {
+ if (len == -1 &&
+ (errno == EINTR || errno == EAGAIN)) {
+ continue;
+ }
+ return -1;
+ }
+
+ if (!write_sync(STDOUT_FILENO, msg, len)) {
+ perror("write() failed");
+ return -1;
+ }
+ }
+ } while (received_signal == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct termios attr;
+ int domid;
+ int xc_handle;
+ char *sopt = "h";
+ int ch;
+ int opt_ind=0;
+ struct option lopt[] = {
+ { "help", 0, 0, 'h' },
+ { 0 },
+
+ };
+ char *str_pty;
+ char path[1024];
+ int spty;
+ unsigned int len = 0;
+ struct xs_handle *xs;
+ char *end;
+
+ while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+ switch(ch) {
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ break;
+ }
+ }
+
+ if ((argc - optind) != 1) {
+ fprintf(stderr, "Invalid number of arguments\n");
+ fprintf(stderr, "Try `%s --help' for more information.\n",
+ argv[0]);
+ exit(EINVAL);
+ }
+
+ domid = strtol(argv[optind], &end, 10);
+ if (end && *end) {
+ fprintf(stderr, "Invalid DOMID `%s'\n", argv[optind]);
+ fprintf(stderr, "Try `%s --help' for more information.\n",
+ argv[0]);
+ exit(EINVAL);
+ }
+
+ xs = xs_daemon_open();
+ if (xs == NULL) {
+ err(errno, "Could not contact XenStore");
+ }
+
+ xc_handle = xc_interface_open();
+ if (xc_handle == -1) {
+ err(errno, "xc_interface_open()");
+ }
+
+ signal(SIGTERM, sighandler);
+
+ snprintf(path, sizeof(path), "/console/%d/tty", domid);
+ str_pty = xs_read(xs, path, &len);
+ /* FIXME consoled currently does not assume domain-0 doesn't have a
+ console which is good when we break domain-0 up. To keep us
+ user friendly, we'll bail out here since no data will ever show
+ up on domain-0. */
+ if (domid == 0 || str_pty == NULL) {
+ err(errno, "Could not read tty from store");
+ }
+ spty = open(str_pty, O_RDWR | O_NOCTTY);
+ if (spty == -1) {
+ err(errno, "Could not open tty `%s'", str_pty);
+ }
+ free(str_pty);
+
+ init_term(STDIN_FILENO, &attr);
+ console_loop(xc_handle, domid, spty);
+ restore_term(STDIN_FILENO, &attr);
+
+ return 0;
+ }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/io.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/io.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,362 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#define _GNU_SOURCE
+
+#include "utils.h"
+#include "io.h"
+
+#include "xenctrl.h"
+#include "xs.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <termios.h>
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+struct buffer
+{
+ char *data;
+ size_t size;
+ size_t capacity;
+ size_t max_capacity;
+};
+
+static void buffer_append(struct buffer *buffer, const void *data, size_t size)
+{
+ if ((buffer->capacity - buffer->size) < size) {
+ buffer->capacity += (size + 1024);
+ buffer->data = realloc(buffer->data, buffer->capacity);
+ if (buffer->data == NULL) {
+ dolog(LOG_ERR, "Memory allocation failed");
+ exit(ENOMEM);
+ }
+ }
+
+ memcpy(buffer->data + buffer->size, data, size);
+ buffer->size += size;
+
+ if (buffer->max_capacity &&
+ buffer->size > buffer->max_capacity) {
+ memmove(buffer->data + (buffer->size - buffer->max_capacity),
+ buffer->data, buffer->max_capacity);
+ buffer->data = realloc(buffer->data, buffer->max_capacity);
+ buffer->capacity = buffer->max_capacity;
+ }
+}
+
+static bool buffer_empty(struct buffer *buffer)
+{
+ return buffer->size == 0;
+}
+
+static void buffer_advance(struct buffer *buffer, size_t size)
+{
+ size = MIN(size, buffer->size);
+ memmove(buffer->data, buffer + size, buffer->size - size);
+ buffer->size -= size;
+}
+
+struct domain
+{
+ int domid;
+ int tty_fd;
+ bool is_dead;
+ struct buffer buffer;
+ struct domain *next;
+};
+
+static struct domain *dom_head;
+
+static bool domain_is_valid(int domid)
+{
+ bool ret;
+ xc_dominfo_t info;
+
+ ret = (xc_domain_getinfo(xc, domid, 1, &info) == 1 &&
+ info.domid == domid);
+
+ return ret;
+}
+
+static int domain_create_tty(struct domain *dom)
+{
+ char path[1024];
+ int master;
+
+ if ((master = getpt()) == -1 ||
+ grantpt(master) == -1 || unlockpt(master) == -1) {
+ dolog(LOG_ERR, "Failed to create tty for domain-%d",
+ dom->domid);
+ master = -1;
+ } else {
+ const char *slave = ptsname(master);
+ struct termios term;
+ char *data;
+ unsigned int len;
+
+ if (tcgetattr(master, &term) != -1) {
+ cfmakeraw(&term);
+ tcsetattr(master, TCSAFLUSH, &term);
+ }
+
+ xs_mkdir(xs, "/console");
+ snprintf(path, sizeof(path), "/console/%d", dom->domid);
+ xs_mkdir(xs, path);
+ strcat(path, "/tty");
+
+ xs_write(xs, path, slave, strlen(slave), O_CREAT);
+
+ snprintf(path, sizeof(path), "/console/%d/limit", dom->domid);
+ data = xs_read(xs, path, &len);
+ if (data) {
+ dom->buffer.max_capacity = strtoul(data, 0, 0);
+ free(data);
+ }
+ }
+
+ return master;
+}
+
+static struct domain *create_domain(int domid)
+{
+ struct domain *dom;
+
+ dom = (struct domain *)malloc(sizeof(struct domain));
+ if (dom == NULL) {
+ dolog(LOG_ERR, "Out of memory %s:%s():L%d",
+ __FILE__, __FUNCTION__, __LINE__);
+ exit(ENOMEM);
+ }
+
+ dom->domid = domid;
+ dom->tty_fd = domain_create_tty(dom);
+ dom->is_dead = false;
+ dom->buffer.data = 0;
+ dom->buffer.size = 0;
+ dom->buffer.capacity = 0;
+ dom->buffer.max_capacity = 0;
+ dom->next = 0;
+
+ dolog(LOG_DEBUG, "New domain %d", domid);
+
+ return dom;
+}
+
+static struct domain *lookup_domain(int domid)
+{
+ struct domain **pp;
+
+ for (pp = &dom_head; *pp; pp = &(*pp)->next) {
+ struct domain *dom = *pp;
+
+ if (dom->domid == domid) {
+ return dom;
+ } else if (dom->domid > domid) {
+ *pp = create_domain(domid);
+ (*pp)->next = dom;
+ return *pp;
+ }
+ }
+
+ *pp = create_domain(domid);
+ return *pp;
+}
+
+static void remove_domain(struct domain *dom)
+{
+ struct domain **pp;
+
+ dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
+
+ for (pp = &dom_head; *pp; pp = &(*pp)->next) {
+ struct domain *d = *pp;
+
+ if (dom->domid == d->domid) {
+ *pp = d->next;
+ if (d->buffer.data) {
+ free(d->buffer.data);
+ }
+ free(d);
+ break;
+ }
+ }
+}
+
+static void remove_dead_domains(struct domain *dom)
+{
+ if (dom == NULL) return;
+ remove_dead_domains(dom->next);
+
+ if (dom->is_dead) {
+ remove_domain(dom);
+ }
+}
+
+static void handle_tty_read(struct domain *dom)
+{
+ ssize_t len;
+ xcs_msg_t msg;
+
+ msg.type = XCS_REQUEST;
+ msg.u.control.remote_dom = dom->domid;
+ msg.u.control.msg.type = CMSG_CONSOLE;
+ msg.u.control.msg.subtype = CMSG_CONSOLE_DATA;
+ msg.u.control.msg.id = 1;
+
+ len = read(dom->tty_fd, msg.u.control.msg.msg, 60);
+ if (len < 1) {
+ close(dom->tty_fd);
+
+ if (domain_is_valid(dom->domid)) {
+ dom->tty_fd = domain_create_tty(dom);
+ } else {
+ dom->is_dead = true;
+ }
+ } else if (domain_is_valid(dom->domid)) {
+ msg.u.control.msg.length = len;
+
+ if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) {
+ dolog(LOG_ERR, "Write to xcs failed: %m");
+ exit(1);
+ }
+ } else {
+ close(dom->tty_fd);
+ dom->is_dead = true;
+ }
+}
+
+static void handle_tty_write(struct domain *dom)
+{
+ ssize_t len;
+
+ len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size);
+ if (len < 1) {
+ close(dom->tty_fd);
+
+ if (domain_is_valid(dom->domid)) {
+ dom->tty_fd = domain_create_tty(dom);
+ } else {
+ dom->is_dead = true;
+ }
+ } else {
+ buffer_advance(&dom->buffer, len);
+ }
+}
+
+static void handle_xcs_msg(int fd)
+{
+ xcs_msg_t msg;
+
+ if (!read_sync(fd, &msg, sizeof(msg))) {
+ dolog(LOG_ERR, "read from xcs failed! %m");
+ exit(1);
+ } else if (msg.type == XCS_REQUEST) {
+ struct domain *dom;
+
+ dom = lookup_domain(msg.u.control.remote_dom);
+ buffer_append(&dom->buffer,
+ msg.u.control.msg.msg,
+ msg.u.control.msg.length);
+ }
+}
+
+static void enum_domains(void)
+{
+ int domid = 0;
+ xc_dominfo_t dominfo;
+
+ while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
+ lookup_domain(dominfo.domid);
+ domid = dominfo.domid + 1;
+ }
+}
+
+void handle_io(void)
+{
+ fd_set readfds, writefds;
+ int ret;
+ int max_fd = -1;
+ int num_of_writes = 0;
+
+ do {
+ struct domain *d;
+ struct timeval tv = { 1, 0 };
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+
+ FD_SET(xcs_data_fd, &readfds);
+ max_fd = MAX(xcs_data_fd, max_fd);
+
+ for (d = dom_head; d; d = d->next) {
+ if (d->tty_fd != -1) {
+ FD_SET(d->tty_fd, &readfds);
+ }
+
+ if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) {
+ FD_SET(d->tty_fd, &writefds);
+ }
+
+ max_fd = MAX(d->tty_fd, max_fd);
+ }
+
+ ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
+ if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) {
+#if 0
+ /* FIXME */
+ /* This is a nasty hack. xcs does not handle the
+ control channels filling up well at all. We'll
+ throttle ourselves here since we do proper
+ queueing to give the domains a shot at pulling out
+ the data. Fixing xcs is not worth it as it's
+ going away */
+ tv.tv_usec = 1000;
+ select(0, 0, 0, 0, &tv);
+#endif
+ }
+ enum_domains();
+
+ if (FD_ISSET(xcs_data_fd, &readfds)) {
+ handle_xcs_msg(xcs_data_fd);
+ }
+
+ for (d = dom_head; d; d = d->next) {
+ if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) {
+ handle_tty_read(d);
+ }
+
+ if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) {
+ handle_tty_write(d);
+ }
+ }
+
+ remove_dead_domains(dom_head);
+ } while (ret > -1);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/io.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/io.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,26 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#ifndef CONSOLED_IO_H
+#define CONSOLED_IO_H
+
+void handle_io(void);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/main.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/main.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,93 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "xenctrl.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include "utils.h"
+#include "io.h"
+
+int main(int argc, char **argv)
+{
+ const char *sopts = "hVvi";
+ struct option lopts[] = {
+ { "help", 0, 0, 'h' },
+ { "version", 0, 0, 'V' },
+ { "verbose", 0, 0, 'v' },
+ { "interactive", 0, 0, 'i' },
+ { 0 },
+ };
+ bool is_interactive = false;
+ int ch;
+ int syslog_option = LOG_CONS;
+ int syslog_mask = LOG_WARNING;
+ int opt_ind = 0;
+
+ while ((ch = getopt_long(argc, argv, sopts, lopts, &opt_ind)) != -1) {
+ switch (ch) {
+ case 'h':
+ //usage(argv[0]);
+ exit(0);
+ case 'V':
+ //version(argv[0]);
+ exit(0);
+ case 'v':
+ syslog_option |= LOG_PERROR;
+ syslog_mask = LOG_DEBUG;
+ break;
+ case 'i':
+ is_interactive = true;
+ break;
+ case '?':
+ fprintf(stderr,
+ "Try `%s --help' for more information\n",
+ argv[0]);
+ exit(EINVAL);
+ }
+ }
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "%s requires root to run.\n", argv[0]);
+ exit(EPERM);
+ }
+
+ openlog("xenconsoled", syslog_option, LOG_DAEMON);
+ setlogmask(syslog_mask);
+
+ if (!is_interactive) {
+ daemonize("/var/run/xenconsoled.pid");
+ }
+
+ xen_setup();
+
+ handle_io();
+
+ closelog();
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/utils.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/utils.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,253 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+
+#include "xenctrl.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include "utils.h"
+
+struct xs_handle *xs;
+int xc;
+
+int xcs_ctrl_fd = -1;
+int xcs_data_fd = -1;
+
+bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
+{
+ size_t offset = 0;
+ ssize_t len;
+
+ while (offset < size) {
+ if (do_read) {
+ len = read(fd, data + offset, size - offset);
+ } else {
+ len = write(fd, data + offset, size - offset);
+ }
+
+ if (len < 1) {
+ if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
+ continue;
+ } else {
+ return false;
+ }
+ } else {
+ offset += len;
+ }
+ }
+
+ return true;
+}
+
+static int open_domain_socket(const char *path)
+{
+ struct sockaddr_un addr;
+ int sock;
+ size_t addr_len;
+
+ if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
+ goto out;
+ }
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, path);
+ addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
+
+ if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
+ goto out_close_sock;
+ }
+
+ return sock;
+
+ out_close_sock:
+ close(sock);
+ out:
+ return -1;
+}
+
+static void child_exit(int sig)
+{
+ while (waitpid(-1, NULL, WNOHANG) > 0);
+}
+
+void daemonize(const char *pidfile)
+{
+ pid_t pid;
+ int fd;
+ int len;
+ int i;
+ char buf[100];
+
+ if (getppid() == 1) {
+ return;
+ }
+
+ if ((pid = fork()) > 0) {
+ exit(0);
+ } else if (pid == -1) {
+ err(errno, "fork() failed");
+ }
+
+ setsid();
+
+ /* redirect fd 0,1,2 to /dev/null */
+ if ((fd = open("/dev/null",O_RDWR)) == -1) {
+ exit(1);
+ }
+
+ for (i = 0; i <= 2; i++) {
+ close(i);
+ dup2(fd, i);
+ }
+
+ close(fd);
+
+ umask(027);
+ chdir("/");
+
+ fd = open(pidfile, O_RDWR | O_CREAT);
+ if (fd == -1) {
+ exit(1);
+ }
+
+ if (lockf(fd, F_TLOCK, 0) == -1) {
+ exit(1);
+ }
+
+ len = sprintf(buf, "%d\n", getpid());
+ write(fd, buf, len);
+
+ signal(SIGCHLD, child_exit);
+ signal(SIGTSTP, SIG_IGN);
+ signal(SIGTTOU, SIG_IGN);
+ signal(SIGTTIN, SIG_IGN);
+}
+
+/* synchronized send/recv strictly for setting up xcs */
+/* always use asychronize callbacks any other time */
+static bool xcs_send_recv(int fd, xcs_msg_t *msg)
+{
+ bool ret = false;
+
+ if (!write_sync(fd, msg, sizeof(*msg))) {
+ dolog(LOG_ERR, "Write failed at %s:%s():L%d? Possible bug.",
+ __FILE__, __FUNCTION__, __LINE__);
+ goto out;
+ }
+
+ if (!read_sync(fd, msg, sizeof(*msg))) {
+ dolog(LOG_ERR, "Read failed at %s:%s():L%d? Possible bug.",
+ __FILE__, __FUNCTION__, __LINE__);
+ goto out;
+ }
+
+ ret = true;
+
+ out:
+ return ret;
+}
+
+bool xen_setup(void)
+{
+ int sock;
+ xcs_msg_t msg;
+
+ xs = xs_daemon_open();
+ if (xs == NULL) {
+ dolog(LOG_ERR,
+ "Failed to contact xenstore (%m). Is it running?");
+ goto out;
+ }
+
+ xc = xc_interface_open();
+ if (xc == -1) {
+ dolog(LOG_ERR, "Failed to contact hypervisor (%m)");
+ goto out;
+ }
+
+ sock = open_domain_socket(XCS_SUN_PATH);
+ if (sock == -1) {
+ dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?");
+ goto out_close_store;
+ }
+
+ xcs_ctrl_fd = sock;
+
+ sock = open_domain_socket(XCS_SUN_PATH);
+ if (sock == -1) {
+ dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?");
+ goto out_close_ctrl;
+ }
+
+ xcs_data_fd = sock;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = XCS_CONNECT_CTRL;
+ if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
+ dolog(LOG_ERR, "xcs control connect failed. Possible bug.");
+ goto out_close_data;
+ }
+
+ msg.type = XCS_CONNECT_DATA;
+ if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) {
+ dolog(LOG_ERR, "xcs data connect failed. Possible bug.");
+ goto out_close_data;
+ }
+
+ /* Since the vast majority of control messages are console messages
+ it's just easier to ignore other messages that try to bind to
+ a specific type. */
+ msg.type = XCS_MSG_BIND;
+ msg.u.bind.port = PORT_WILDCARD;
+ msg.u.bind.type = TYPE_WILDCARD;
+ if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
+ dolog(LOG_ERR, "xcs vind failed. Possible bug.");
+ goto out_close_data;
+ }
+
+ return true;
+
+ out_close_data:
+ close(xcs_ctrl_fd);
+ xcs_data_fd = -1;
+ out_close_ctrl:
+ close(xcs_ctrl_fd);
+ xcs_ctrl_fd = -1;
+ out_close_store:
+ xs_daemon_close(xs);
+ out:
+ return false;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/utils.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/utils.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,47 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * Xen Console Daemon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#ifndef CONSOLED_UTILS_H
+#define CONSOLED_UTILS_H
+
+#include <stdbool.h>
+#include <syslog.h>
+#include <stdio.h>
+
+#include "xs.h"
+
+void daemonize(const char *pidfile);
+bool xen_setup(void);
+#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
+#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
+bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
+
+extern int xcs_ctrl_fd;
+extern int xcs_data_fd;
+extern struct xs_handle *xs;
+extern int xc;
+
+#if 1
+#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__)
+#else
+#define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__)
+#endif
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,11 @@
+CFLAGS=-g -Wall
+CC=gcc
+LDFLAGS=-static
+
+all: console-dom0 console-domU procpipe
+
+console-dom0: console-dom0.o
+console-domU: console-domU.o
+procpipe: procpipe.o
+
+clean:; $(RM) *.o console-domU console-dom0 procpipe
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/README
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/README Thu Aug 25 22:53:20 2005
@@ -0,0 +1,29 @@
+ABOUT
+
+This tool uses two programs, one that lives in dom0 and one that lives in domU
+to verify that no data is lost. dom0 and domU share a handshake with each
+other that they use to exchange a random seed.
+
+Both programs then generate a series of random numbers and then writes and
+reads the numbers via the console. Because each side starts with the same seed
+they know what data the other side is generating and therefore what should be
+expected.
+
+RUNNNING
+
+console-domU should be installed within the guest image. It must be launched
+from the client automatically. I use a custom initrd image and put it in the
+/linuxrc.
+
+console-dom0 and console-domU will communicate with each other and stress the
+console code. You can verify it at various levels by invoking it in different
+ways. procpipe is used to connect the two. I use the following command for
+testing:
+
+./procpipe ./console-dom0 'xm create -c /etc/xen/xmexample1'
+
+xmexample1 has no devices and no root set (this is what triggers /linuxrc).
+
+If it freezes, it probably means that console-domU is expecting more data from
+console-dom0 (which means that some data got dropped). I'd like to add
+timeouts in the future to handle this more gracefully.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/console-dom0.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/console-dom0.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,117 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <termios.h>
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static void generate_random_buffer(char *buffer, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ buffer[i] = random() & 0xFF;
+ }
+}
+
+static void canonicalize(char *buffer)
+{
+ char *reader, *writer;
+
+ reader = writer = buffer;
+
+ while (*reader) {
+ *writer = *reader;
+ if (*reader != '\r') writer++;
+ reader++;
+ }
+ *writer = *reader;
+}
+
+int main(int argc, char **argv)
+{
+ char buffer[4096];
+ char *line;
+ unsigned int seed;
+ size_t size;
+ int runs;
+ unsigned long long total_bytes = 0;
+ struct termios term;
+
+ tcgetattr(STDIN_FILENO, &term);
+ cfmakeraw(&term);
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &term);
+
+ tcgetattr(STDOUT_FILENO, &term);
+ cfmakeraw(&term);
+ tcsetattr(STDOUT_FILENO, TCSAFLUSH, &term);
+
+ while ((line = fgets(buffer, sizeof(buffer), stdin))) {
+ canonicalize(line);
+
+ if (strcmp(line, "!!!XEN Test Begin!!!\n") == 0) {
+ break;
+ } else {
+ fprintf(stderr, "%s", line);
+ }
+ }
+
+ if (line == NULL) {
+ fprintf(stderr, "Client never sent start string.\n");
+ return 1;
+ }
+
+ seed = time(0);
+
+ printf("%u\n", seed); fflush(stdout);
+
+ fprintf(stderr, "Waiting for seed acknowledgement\n");
+ line = fgets(buffer, sizeof(buffer), stdin);
+ if (line == NULL) {
+ fprintf(stderr, "Client never acknowledge seed.\n");
+ return 1;
+ }
+
+ canonicalize(line);
+ if (strcmp(line, "Seed Okay.\n") != 0) {
+ fprintf(stderr, "Incorrect seed acknowledgement.\n");
+ fprintf(stderr, "[%s]", line);
+ return 1;
+ } else {
+ fprintf(stderr, "Processed seed.\n");
+ }
+
+ srandom(seed);
+
+ for (runs = (random() % 100000) + 4096; runs > 0; runs--) {
+
+ size = random() % 4096;
+
+ fprintf(stderr, "Writing %d bytes.\n", size);
+
+ generate_random_buffer(buffer, size);
+ fwrite(buffer, size, 1, stdout);
+ fflush(stdout);
+
+ do {
+ line = fgets(buffer, sizeof(buffer), stdin);
+ if (line == NULL) {
+ fprintf(stderr, "Premature EOF from client.\n");
+ return 1;
+ }
+
+ canonicalize(line);
+ fprintf(stderr, "%s", line);
+ } while (strcmp(line, "Okay.\n") != 0);
+
+ total_bytes += size;
+ }
+
+ fprintf(stderr, "PASS: processed %llu byte(s).\n", total_bytes);
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/console-domU.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/console-domU.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,76 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <termios.h>
+#include <unistd.h>
+
+static void canonicalize(char *buffer)
+{
+ char *reader, *writer;
+
+ reader = writer = buffer;
+
+ while (*reader) {
+ *writer = *reader;
+ if (*reader != '\r') writer++;
+ reader++;
+ }
+ *writer = *reader;
+}
+
+int main(int argc, char **argv)
+{
+ char buffer[4096];
+ char *line;
+ unsigned int seed;
+ size_t size;
+ int i;
+ int runs;
+ struct termios term;
+
+ tcgetattr(STDIN_FILENO, &term);
+ cfmakeraw(&term);
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &term);
+
+ tcgetattr(STDOUT_FILENO, &term);
+ cfmakeraw(&term);
+ tcsetattr(STDOUT_FILENO, TCSAFLUSH, &term);
+
+ printf("!!!XEN Test Begin!!!\n"); fflush(stdout);
+ line = fgets(buffer, sizeof(buffer), stdin);
+ if (line == NULL) {
+ printf("Failure\n"); fflush(stdout);
+ return 1;
+ }
+
+ canonicalize(line);
+ seed = strtoul(line, 0, 0);
+
+ printf("Seed Okay.\n"); fflush(stdout);
+
+ srandom(seed);
+
+ for (runs = (random() % 100000) + 4096; runs > 0; runs--) {
+ size = random() % 4096;
+
+ for (i = 0; i < size; i++) {
+ int ch;
+ int exp;
+
+ ch = fgetc(stdin);
+ exp = random() & 0xFF;
+ if (ch != exp) {
+ printf("Expected %d got %d\n",
+ exp, ch);
+ fflush(stdout);
+ }
+ printf("Got %d/%d good bytes\n", i, size);
+ }
+
+ printf("Okay.\n"); fflush(stdout);
+ }
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/procpipe.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/procpipe.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,133 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <err.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define PACKAGE_NAME "procpipe"
+#define PACKAGE_VERSION "0.0.1"
+
+#define GPL_SHORT \
+"This is free software; see the source for copying conditions. There is NO\n"\
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+#define PACKAGE_BUGS "aliguori@xxxxxxxxxx"
+#define PACKAGE_AUTHOR "Anthony Liguori"
+#define PACKAGE_OWNER "IBM, Corp."
+#define PACKAGE_LICENSE GPL_SHORT
+
+static void usage(const char *name)
+{
+ printf("Usage: %s [OPTIONS]\n"
+ "\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ "\n"
+ "Report bugs to <%s>.\n"
+ , name, PACKAGE_BUGS);
+}
+
+static void version(const char *name)
+{
+ printf("%s (%s) %s\n"
+ "Written by %s.\n"
+ "\n"
+ "Copyright (C) 2005 %s.\n"
+ "%s\n"
+ , name, PACKAGE_NAME, PACKAGE_VERSION,
+ PACKAGE_AUTHOR, PACKAGE_OWNER, PACKAGE_LICENSE);
+}
+
+static pid_t exec(int stdout, int stdin, const char *cmd)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ close(STDOUT_FILENO);
+ dup2(stdout, STDOUT_FILENO);
+ close(STDIN_FILENO);
+ dup2(stdin, STDIN_FILENO);
+
+ execlp("/bin/sh", "sh", "-c", cmd, NULL);
+ }
+
+ return pid;
+}
+
+int main(int argc, char **argv)
+{
+ int ch, opt_ind = 0;
+ const char *sopt = "hV";
+ struct option lopt[] = {
+ { "help", 0, 0, 'h' },
+ { "version", 0, 0, 'V' },
+ { 0 }
+ };
+ int host_stdout[2];
+ int host_stdin[2];
+ int res;
+ pid_t pid1, pid2;
+ int status;
+
+ while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+ switch (ch) {
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ case 'V':
+ version(argv[0]);
+ exit(0);
+ case '?':
+ errx(EINVAL, "Try `%s --help' for more information.",
+ argv[0]);
+ }
+ }
+
+ if ((argc - optind) != 2) {
+ errx(EINVAL, "Two commands are required.\n"
+ "Try `%s --help' for more information.", argv[0]);
+ }
+
+ res = pipe(host_stdout);
+ if (res == -1) {
+ err(errno, "pipe() failed");
+ }
+
+ res = pipe(host_stdin);
+ if (res == -1) {
+ err(errno, "pipe() failed");
+ }
+
+ pid1 = exec(host_stdout[1], host_stdin[0], argv[optind]);
+ if (pid1 == -1) {
+ err(errno, "exec(%s)", argv[optind]);
+ }
+
+ pid2 = exec(host_stdin[1], host_stdout[0], argv[optind + 1]);
+ if (pid2 == -1) {
+ err(errno, "exec(%s)", argv[optind + 1]);
+ }
+
+ waitpid(pid1, &status, 0);
+ if (WIFEXITED(status)) status = WEXITSTATUS(status);
+
+ if (status != 0) {
+ printf("Child exited with status %d\n", status);
+ }
+
+ waitpid(pid2, &status, 0);
+ if (WIFEXITED(status)) status = WEXITSTATUS(status);
+
+ if (status != 0) {
+ printf("Child2 exited with status %d\n", status);
+ }
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/backend.hotplug
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/examples/backend.hotplug Thu Aug 25 22:53:20 2005
@@ -0,0 +1,21 @@
+#! /bin/sh
+
+#DEVPATH=/devices/xen-backend/vif-1-0
+#ACTION=add
+
+PATH=/etc/xen/scripts:$PATH
+
+DEV=$(basename "$DEVPATH")
+case "$ACTION" in
+ add)
+ case "$DEV" in
+ vif-*)
+ vif=$(echo "$DEV" | sed 's/-\([0-9]*\)-\([0-9]*\)/\1.\2/')
+ vif-bridge up domain=unknown vif="$vif" mac=fe:ff:ff:ff:ff:ff
bridge=xen-br0 >/dev/null 2>&1
+ ;;
+ esac
+ ;;
+ remove)
+ ;;
+esac
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/network-bridge
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/examples/network-bridge Thu Aug 25 22:53:20 2005
@@ -0,0 +1,261 @@
+#!/bin/sh -x
+#============================================================================
+# Default Xen network start/stop script.
+# Xend calls a network script when it starts.
+# The script name to use is defined in /etc/xen/xend-config.sxp
+# in the network-script field.
+#
+# This script creates a bridge (default xen-br0), adds a device
+# (default eth0) to it, copies the IP addresses from the device
+# to the bridge and adjusts the routes accordingly.
+#
+# If all goes well, this should ensure that networking stays up.
+# However, some configurations are upset by this, especially
+# NFS roots. If the bridged setup does not meet your needs,
+# configure a different script, for example using routing instead.
+#
+# Usage:
+#
+# network (start|stop|status) {VAR=VAL}*
+#
+# Vars:
+#
+# bridge The bridge to use (default xen-br0).
+# netdev The interface to add to the bridge (default eth0).
+# antispoof Whether to use iptables to prevent spoofing (default yes).
+#
+# start:
+# Creates the bridge and enslaves netdev to it.
+# Copies the IP addresses from netdev to the bridge.
+# Deletes the routes to netdev and adds them on bridge.
+#
+# stop:
+# Removes netdev from the bridge.
+# Deletes the routes to bridge and adds them to netdev.
+#
+# status:
+# Print ifconfig for netdev and bridge.
+# Print routes.
+#
+#============================================================================
+
+# Exit if anything goes wrong.
+set -e
+
+# First arg is the operation.
+OP=$1
+shift
+
+# Pull variables in args in to environment.
+for arg ; do export "${arg}" ; done
+
+bridge=${bridge:-xen-br0}
+netdev=${netdev:-eth0}
+antispoof=${antispoof:-no}
+
+echo "*network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" >&2
+
+# Usage: transfer_addrs src dst
+# Copy all IP addresses (including aliases) from device $src to device $dst.
+transfer_addrs () {
+ local src=$1
+ local dst=$2
+ # Don't bother if $dst already has IP addresses.
+ if ip addr show dev ${dst} | egrep -q '^ *inet ' ; then
+ return
+ fi
+ # Address lines start with 'inet' and have the device in them.
+ # Replace 'inet' with 'ip addr add' and change the device name $src
+ # to 'dev $src'.
+ ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
+s/inet/ip addr add/
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
+s/${src}/dev ${dst}/
+" | sh -e
+ # Remove automatic routes on destionation device
+ ip route list | sed -ne "
+/dev ${dst}\( \|$\)/ {
+ s/^/ip route del /
+ p
+}" | sh -e
+}
+
+# Usage: del_addrs src
+del_addrs () {
+ local src=$1
+ ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
+s/inet/ip addr del/
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
+s/${src}/dev ${src}/
+" | sh -e
+}
+
+# Usage: transfer_routes src dst
+# Get all IP routes to device $src, delete them, and
+# add the same routes to device $dst.
+# The original routes have to be deleted, otherwise adding them
+# for $dst fails (duplicate routes).
+transfer_routes () {
+ local src=$1
+ local dst=$2
+ # List all routes and grep the ones with $src in.
+ # Stick 'ip route del' on the front to delete.
+ # Change $src to $dst and use 'ip route add' to add.
+ ip route list | sed -ne "
+/dev ${src}\( \|$\)/ {
+ h
+ s/^/ip route del /
+ P
+ g
+ s/${src}/${dst}/
+ s/^/ip route add /
+ P
+ d
+}" | sh -e
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+ local bridge=$1
+
+ # Don't create the bridge if it already exists.
+ if ! brctl show | grep -q ${bridge} ; then
+ brctl addbr ${bridge}
+ brctl stp ${bridge} off
+ brctl setfd ${bridge} 0
+ fi
+ ifconfig ${bridge} up
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+ local bridge=$1
+ local dev=$2
+ # Don't add $dev to $bridge if it's already on a bridge.
+ if ! brctl show | grep -q ${dev} ; then
+ brctl addif ${bridge} ${dev}
+ fi
+}
+
+# Usage: antispoofing dev bridge
+# Set the default forwarding policy for $dev to drop.
+# Allow forwarding to the bridge.
+antispoofing () {
+ local dev=$1
+ local bridge=$2
+
+ iptables -P FORWARD DROP
+ iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
+}
+
+# Usage: show_status dev bridge
+# Print ifconfig and routes.
+show_status () {
+ local dev=$1
+ local bridge=$2
+
+ echo '============================================================'
+ ifconfig ${dev}
+ ifconfig ${bridge}
+ echo ' '
+ ip route list
+ echo ' '
+ route -n
+ echo '============================================================'
+}
+
+op_start () {
+ if [ "${bridge}" == "null" ] ; then
+ return
+ fi
+
+ create_bridge ${bridge}
+
+ if ifconfig 2>/dev/null | grep -q veth0 ; then
+ return
+ fi
+
+ if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
+ mac=`ifconfig ${netdev} | grep HWadd | sed -e
's/.*\(..:..:..:..:..:..\).*/\1/'`
+ if ! ifdown ${netdev} ; then
+ # if ifup didn't work, see if we have an ip= on cmd line
+ if egrep 'ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:' /proc/cmdline ;
+ then
+ kip=`sed -e
's!.*ip=\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\):.*!\1!' /proc/cmdline`
+ kmask=`sed -e
's!.*ip=[^:]*:[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline`
+ kgate=`sed -e 's!.*ip=[^:]*:[^:]*:\([^:]*\):.*!\1!'
/proc/cmdline`
+ ifconfig ${netdev} 0.0.0.0 down
+ fi
+ fi
+ ip link set ${netdev} name p${netdev}
+ ip link set veth0 name ${netdev}
+ ifconfig p${netdev} 0.0.0.0 -arp down
+ ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
+ ifconfig ${netdev} hw ether ${mac}
+ add_to_bridge ${bridge} vif0.0
+ add_to_bridge ${bridge} p${netdev}
+ ip link set ${bridge} up
+ ip link set vif0.0 up
+ ip link set p${netdev} up
+ if ! ifup ${netdev} ; then
+ if [ ${kip} ] ; then
+ # use the addresses we grocked from /proc/cmdline
+ ifconfig ${netdev} ${kip}
+ [ ${kmask} ] && ifconfig ${netdev} netmask ${kmask}
+ ifconfig ${netdev} up
+ [ ${kgate} ] && ip route add default via ${kgate}
+ fi
+ fi
+ else
+ # old style without veth0
+ transfer_addrs ${netdev} ${bridge}
+ transfer_routes ${netdev} ${bridge}
+ fi
+
+ if [ ${antispoof} == 'yes' ] ; then
+ antispoofing ${netdev} ${bridge}
+ fi
+}
+
+op_stop () {
+ if [ "${bridge}" == "null" ] ; then
+ return
+ fi
+
+ brctl delif ${bridge} ${netdev}
+
+ if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
+ brctl delif ${bridge} vif0.0
+ ifconfig vif0.0 down
+ mac=`ifconfig veth0 | grep HWadd | sed -e
's/.*\(..:..:..:..:..:..\).*/\1/'`
+ ifconfig ${netdev} down
+ ifconfig ${netdev} hw ether ${mac}
+ ifconfig ${netdev} arp up
+ transfer_addrs veth0 ${netdev}
+ transfer_routes veth0 ${netdev}
+ del_addrs veth0
+ ifconfig veth0 -arp down
+ ifconfig veth0 hw ether 00:00:00:00:00:00
+ else
+ transfer_routes ${bridge} ${netdev}
+ fi
+}
+
+case ${OP} in
+ start)
+ op_start
+ ;;
+
+ stop)
+ op_stop
+ ;;
+
+ status)
+ show_status ${netdev} ${bridge}
+ ;;
+
+ *)
+ echo 'Unknown command: ' ${OP} >&2
+ echo 'Valid commands are: start, stop, status' >&2
+ exit 1
+esac
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xenctrl.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xenctrl.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,526 @@
+/******************************************************************************
+ * xenctrl.h
+ *
+ * A library for low-level access to the Xen control interfaces.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef XENCTRL_H
+#define XENCTRL_H
+
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+#include <sys/ptrace.h>
+#include <xen/xen.h>
+#include <xen/dom0_ops.h>
+#include <xen/event_channel.h>
+#include <xen/sched_ctl.h>
+#include <xen/acm.h>
+
+#ifdef __ia64__
+#define XC_PAGE_SHIFT 14
+#else
+#define XC_PAGE_SHIFT 12
+#endif
+#define XC_PAGE_SIZE (1UL << XC_PAGE_SHIFT)
+#define XC_PAGE_MASK (~(XC_PAGE_SIZE-1))
+
+/*
+ * DEFINITIONS FOR CPU BARRIERS
+ */
+
+#if defined(__i386__)
+#define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__x86_64__)
+#define mb() __asm__ __volatile__ ( "mfence" : : : "memory")
+#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory")
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__ia64__)
+/* FIXME */
+#define mb()
+#define rmb()
+#define wmb()
+#else
+#error "Define barriers"
+#endif
+
+/*
+ * INITIALIZATION FUNCTIONS
+ */
+
+/**
+ * This function opens a handle to the hypervisor interface. This function can
+ * be called multiple times within a single process. Multiple processes can
+ * have an open hypervisor interface at the same time.
+ *
+ * Each call to this function should have a corresponding call to
+ * xc_interface_close().
+ *
+ * This function can fail if the caller does not have superuser permission or
+ * if a Xen-enabled kernel is not currently running.
+ *
+ * @return a handle to the hypervisor interface or -1 on failure
+ */
+int xc_interface_open(void);
+
+/**
+ * This function closes an open hypervisor interface.
+ *
+ * This function can fail if the handle does not represent an open interface or
+ * if there were problems closing the interface.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @return 0 on success, -1 otherwise.
+ */
+int xc_interface_close(int xc_handle);
+
+/*
+ * DOMAIN DEBUGGING FUNCTIONS
+ */
+
+typedef struct xc_core_header {
+ unsigned int xch_magic;
+ unsigned int xch_nr_vcpus;
+ unsigned int xch_nr_pages;
+ unsigned int xch_ctxt_offset;
+ unsigned int xch_index_offset;
+ unsigned int xch_pages_offset;
+} xc_core_header_t;
+
+
+long xc_ptrace(enum __ptrace_request request,
+ u32 domid,
+ long addr,
+ long data);
+
+long xc_ptrace_core(enum __ptrace_request request,
+ u32 domid,
+ long addr,
+ long data);
+
+int xc_waitdomain(int domain,
+ int *status,
+ int options);
+
+int xc_waitdomain_core(int domain,
+ int *status,
+ int options);
+
+/*
+ * DOMAIN MANAGEMENT FUNCTIONS
+ */
+
+typedef struct {
+ u32 domid;
+ u32 ssidref;
+ unsigned int dying:1, crashed:1, shutdown:1,
+ paused:1, blocked:1, running:1;
+ unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
+ unsigned long nr_pages;
+ unsigned long shared_info_frame;
+ u64 cpu_time;
+ unsigned long max_memkb;
+ unsigned int vcpus;
+ s32 vcpu_to_cpu[MAX_VIRT_CPUS];
+ cpumap_t cpumap[MAX_VIRT_CPUS];
+} xc_dominfo_t;
+
+typedef dom0_getdomaininfo_t xc_domaininfo_t;
+int xc_domain_create(int xc_handle,
+ u32 ssidref,
+ u32 *pdomid);
+
+
+int xc_domain_dumpcore(int xc_handle,
+ u32 domid,
+ const char *corename);
+
+
+/**
+ * This function pauses a domain. A paused domain still exists in memory
+ * however it does not receive any timeslices from the hypervisor.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to pause
+ * @return 0 on success, -1 on failure.
+ */
+int xc_domain_pause(int xc_handle,
+ u32 domid);
+/**
+ * This function unpauses a domain. The domain should have been previously
+ * paused.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to unpause
+ * return 0 on success, -1 on failure
+ */
+int xc_domain_unpause(int xc_handle,
+ u32 domid);
+
+/**
+ * This function will destroy a domain. Destroying a domain removes the domain
+ * completely from memory. This function should be called after sending the
+ * domain a SHUTDOWN control message to free up the domain resources.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to destroy
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_destroy(int xc_handle,
+ u32 domid);
+int xc_domain_pincpu(int xc_handle,
+ u32 domid,
+ int vcpu,
+ cpumap_t *cpumap);
+/**
+ * This function will return information about one or more domains. It is
+ * designed to iterate over the list of domains. If a single domain is
+ * requested, this function will return the next domain in the list - if
+ * one exists. It is, therefore, important in this case to make sure the
+ * domain requested was the one returned.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm first_domid the first domain to enumerate information from. Domains
+ * are currently enumerate in order of creation.
+ * @parm max_doms the number of elements in info
+ * @parm info an array of max_doms size that will contain the information for
+ * the enumerated domains.
+ * @return the number of domains enumerated or -1 on error
+ */
+int xc_domain_getinfo(int xc_handle,
+ u32 first_domid,
+ unsigned int max_doms,
+ xc_dominfo_t *info);
+
+/**
+ * This function will return information about one or more domains, using a
+ * single hypercall. The domain information will be stored into the supplied
+ * array of xc_domaininfo_t structures.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm first_domain the first domain to enumerate information from.
+ * Domains are currently enumerate in order of creation.
+ * @parm max_domains the number of elements in info
+ * @parm info an array of max_doms size that will contain the information for
+ * the enumerated domains.
+ * @return the number of domains enumerated or -1 on error
+ */
+int xc_domain_getinfolist(int xc_handle,
+ u32 first_domain,
+ unsigned int max_domains,
+ xc_domaininfo_t *info);
+
+/**
+ * This function returns information about one domain. This information is
+ * more detailed than the information from xc_domain_getinfo().
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm info a pointer to an xc_domaininfo_t to store the domain information
+ * @parm ctxt a pointer to a structure to store the execution context of the
+ * domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_get_vcpu_context(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ vcpu_guest_context_t *ctxt);
+
+int xc_domain_setcpuweight(int xc_handle,
+ u32 domid,
+ float weight);
+long long xc_domain_get_cpu_usage(int xc_handle,
+ domid_t domid,
+ int vcpu);
+
+
+typedef dom0_shadow_control_stats_t xc_shadow_control_stats_t;
+int xc_shadow_control(int xc_handle,
+ u32 domid,
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages,
+ xc_shadow_control_stats_t *stats);
+
+int xc_bvtsched_global_set(int xc_handle,
+ unsigned long ctx_allow);
+
+int xc_bvtsched_domain_set(int xc_handle,
+ u32 domid,
+ u32 mcuadv,
+ int warpback,
+ s32 warpvalue,
+ long long warpl,
+ long long warpu);
+
+int xc_bvtsched_global_get(int xc_handle,
+ unsigned long *ctx_allow);
+
+int xc_bvtsched_domain_get(int xc_handle,
+ u32 domid,
+ u32 *mcuadv,
+ int *warpback,
+ s32 *warpvalue,
+ long long *warpl,
+ long long *warpu);
+
+int xc_sedf_domain_set(int xc_handle,
+ u32 domid,
+ u64 period, u64 slice, u64 latency, u16 extratime,
u16 weight);
+
+int xc_sedf_domain_get(int xc_handle,
+ u32 domid,
+ u64* period, u64 *slice, u64 *latency, u16
*extratime, u16* weight);
+
+typedef evtchn_status_t xc_evtchn_status_t;
+
+/*
+ * EVENT CHANNEL FUNCTIONS
+ */
+
+/**
+ * This function allocates an unbound port. Ports are named endpoints used for
+ * interdomain communication. This function is most useful in opening a
+ * well-known port within a domain to receive events on.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom the ID of the domain. This maybe DOMID_SELF
+ * @parm port a pointer to a port. This is an in/out parameter. If *port is
+ * 0, then a new port will be assigned, if port is > 0 then that
+ * port is allocated if the port is unallocated.
+ * @return 0 on success, -1 on failure
+ */
+int xc_evtchn_alloc_unbound(int xc_handle,
+ u32 dom,
+ int *port);
+
+/**
+ * This function creates a pair of ports between two domains. A port can only
+ * be bound once within a domain.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom1 one of the two domains to connect. Can be DOMID_SELF.
+ * @parm dom2 the other domain to connect. Can be DOMID_SELF.
+ * @parm port1 an in/out parameter. If > 0, then try to connect *port. If
+ * 0, then allocate a new port and store the port in *port.
+ * @parm port2 the port connected on port2. This parameter behaves the same
+ * way as port1.
+ * @return 0 on success, -1 on error.
+ */
+int xc_evtchn_bind_interdomain(int xc_handle,
+ u32 dom1,
+ u32 dom2,
+ int *port1,
+ int *port2);
+int xc_evtchn_bind_virq(int xc_handle,
+ int virq,
+ int *port);
+
+/**
+ * This function will close a single port on an event channel.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom the domain that the port exists on. May be DOMID_SELF.
+ * @parm port the port to close
+ * @return 0 on success, -1 on error
+ */
+int xc_evtchn_close(int xc_handle,
+ u32 dom, /* may be DOMID_SELF */
+ int port);
+
+/**
+ * This function generates a notify event on a bound port.
+ *
+ * Notifies can be read within Linux by opening /dev/xen/evtchn and reading
+ * a 16 bit value. The result will be the port the event occurred on. When
+ * events occur, the port is masked until the 16 bit port value is written back
+ * to the file. When /dev/xen/evtchn is opened, it has to be bound via an
+ * ioctl to each port to listen on. The ioctl for binding is _IO('E', 2). The
+ * parameter is the port to listen on.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm local_port the port to generate the notify on
+ * @return 0 on success, -1 on error
+ */
+int xc_evtchn_send(int xc_handle,
+ int local_port);
+int xc_evtchn_status(int xc_handle,
+ u32 dom, /* may be DOMID_SELF */
+ int port,
+ xc_evtchn_status_t *status);
+
+int xc_physdev_pci_access_modify(int xc_handle,
+ u32 domid,
+ int bus,
+ int dev,
+ int func,
+ int enable);
+
+int xc_readconsolering(int xc_handle,
+ char **pbuffer,
+ unsigned int *pnr_chars,
+ int clear);
+
+typedef dom0_physinfo_t xc_physinfo_t;
+int xc_physinfo(int xc_handle,
+ xc_physinfo_t *info);
+
+int xc_sched_id(int xc_handle,
+ int *sched_id);
+
+int xc_domain_setmaxmem(int xc_handle,
+ u32 domid,
+ unsigned int max_memkb);
+
+int xc_domain_memory_increase_reservation(int xc_handle,
+ u32 domid,
+ unsigned int mem_kb);
+
+typedef dom0_perfc_desc_t xc_perfc_desc_t;
+/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
+int xc_perfc_control(int xc_handle,
+ u32 op,
+ xc_perfc_desc_t *desc);
+
+/* read/write msr */
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+ unsigned int high);
+
+/**
+ * Memory maps a range within one domain to a local address range. Mappings
+ * should be unmapped with munmap and should follow the same rules as mmap
+ * regarding page alignment. Returns NULL on failure.
+ *
+ * In Linux, the ring queue for the control channel is accessible by mapping
+ * the shared_info_frame (from xc_domain_getinfo()) + 2048. The structure
+ * stored there is of type control_if_t.
+ *
+ * @parm xc_handle a handle on an open hypervisor interface
+ * @parm dom the domain to map memory from
+ * @parm size the amount of memory to map (in multiples of page size)
+ * @parm prot same flag as in mmap().
+ * @parm mfn the frame address to map.
+ */
+void *xc_map_foreign_range(int xc_handle, u32 dom,
+ int size, int prot,
+ unsigned long mfn );
+
+void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
+ unsigned long *arr, int num );
+
+int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf,
+ unsigned long max_pfns);
+
+int xc_ia64_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf,
+ unsigned int start_page, unsigned int nr_pages);
+
+int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops,
+ domid_t dom);
+
+int xc_dom_mem_op(int xc_handle, unsigned int memop, unsigned int *extent_list,
+ unsigned int nr_extents, unsigned int extent_order,
+ domid_t domid);
+
+int xc_get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr);
+
+
+/*\
+ * GRANT TABLE FUNCTIONS
+\*/
+
+/**
+ * This function opens a handle to the more restricted grant table hypervisor
+ * interface. This may be used where the standard interface is not
+ * available because the domain is not privileged.
+ * This function can be called multiple times within a single process.
+ * Multiple processes can have an open hypervisor interface at the same time.
+ *
+ * Each call to this function should have a corresponding call to
+ * xc_grant_interface_close().
+ *
+ * This function can fail if a Xen-enabled kernel is not currently running.
+ *
+ * @return a handle to the hypervisor grant table interface or -1 on failure
+ */
+int xc_grant_interface_open(void);
+
+/**
+ * This function closes an open grant table hypervisor interface.
+ *
+ * This function can fail if the handle does not represent an open interface or
+ * if there were problems closing the interface.
+ *
+ * @parm xc_handle a handle to an open grant table hypervisor interface
+ * @return 0 on success, -1 otherwise.
+ */
+int xc_grant_interface_close(int xc_handle);
+
+int xc_gnttab_map_grant_ref(int xc_handle,
+ u64 host_virt_addr,
+ u32 dom,
+ u16 ref,
+ u16 flags,
+ s16 *handle,
+ u64 *dev_bus_addr);
+
+int xc_gnttab_unmap_grant_ref(int xc_handle,
+ u64 host_virt_addr,
+ u64 dev_bus_addr,
+ u16 handle,
+ s16 *status);
+
+int xc_gnttab_setup_table(int xc_handle,
+ u32 dom,
+ u16 nr_frames,
+ s16 *status,
+ unsigned long **frame_list);
+
+/* Grant debug builds only: */
+int xc_gnttab_dump_table(int xc_handle,
+ u32 dom,
+ s16 *status);
+
+/* Get current total pages allocated to a domain. */
+long xc_get_tot_pages(int xc_handle, u32 domid);
+
+/* Execute a privileged dom0 operation. */
+int xc_dom0_op(int xc_handle, dom0_op_t *op);
+
+/* Initializes the store (for dom0)
+ remote_port should be the remote end of a bound interdomain channel between
+ the store and dom0.
+
+ This function returns a shared frame that should be passed to
+ xs_introduce_domain
+ */
+long xc_init_store(int xc_handle, int remote_port);
+
+/*
+ * MMU updates.
+ */
+#define MAX_MMU_UPDATES 1024
+struct xc_mmu {
+ mmu_update_t updates[MAX_MMU_UPDATES];
+ int idx;
+ domid_t subject;
+};
+typedef struct xc_mmu xc_mmu_t;
+xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom);
+int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu,
+ unsigned long ptr, unsigned long val);
+int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xenguest.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xenguest.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,66 @@
+/******************************************************************************
+ * xenguest.h
+ *
+ * A library for guest domain management in Xen.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef XENBUILD_H
+#define XENBUILD_H
+
+#define XCFLAGS_VERBOSE 1
+#define XCFLAGS_LIVE 2
+#define XCFLAGS_DEBUG 4
+#define XCFLAGS_CONFIGURE 8
+
+/**
+ * This function will save a domain running Linux.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm fd the file descriptor to save a domain to
+ * @parm dom the id of the domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_linux_save(int xc_handle, int fd, uint32_t dom);
+
+/**
+ * This function will restore a saved domain running Linux.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm fd the file descriptor to restore a domain from
+ * @parm dom the id of the domain
+ * @parm nr_pfns the number of pages
+ * @parm store_evtchn the store event channel for this domain to use
+ * @parm store_mfn returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long
nr_pfns,
+ unsigned int store_evtchn, unsigned long *store_mfn);
+
+int xc_linux_build(int xc_handle,
+ uint32_t domid,
+ const char *image_name,
+ const char *ramdisk_name,
+ const char *cmdline,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn);
+
+struct mem_map;
+int xc_vmx_build(int xc_handle,
+ uint32_t domid,
+ int memsize,
+ const char *image_name,
+ struct mem_map *memmap,
+ const char *ramdisk_name,
+ const char *cmdline,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xg_private.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xg_private.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,86 @@
+/******************************************************************************
+ * xg_private.c
+ *
+ * Helper functions for the rest of the library.
+ */
+
+#include <stdlib.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size)
+{
+ int kernel_fd = -1;
+ gzFile kernel_gfd = NULL;
+ char *image = NULL;
+ unsigned int bytes;
+
+ if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
+ {
+ PERROR("Could not open kernel image");
+ goto out;
+ }
+
+ if ( (*size = xc_get_filesz(kernel_fd)) == 0 )
+ {
+ PERROR("Could not read kernel image");
+ goto out;
+ }
+
+ if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
+ {
+ PERROR("Could not allocate decompression state for state file");
+ goto out;
+ }
+
+ if ( (image = malloc(*size)) == NULL )
+ {
+ PERROR("Could not allocate memory for kernel image");
+ goto out;
+ }
+
+ if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
+ {
+ PERROR("Error reading kernel image, could not"
+ " read the whole image (%d != %ld).", bytes, *size);
+ free(image);
+ image = NULL;
+ }
+
+ out:
+ if ( kernel_gfd != NULL )
+ gzclose(kernel_gfd);
+ else if ( kernel_fd >= 0 )
+ close(kernel_fd);
+ return image;
+}
+
+/*******************/
+
+int pin_table(
+ int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
+{
+ struct mmuext_op op;
+
+ op.cmd = type;
+ op.mfn = mfn;
+
+ if ( xc_mmuext_op(xc_handle, &op, 1, dom) < 0 )
+ return 1;
+
+ return 0;
+}
+
+/* This is shared between save and restore, and may generally be useful. */
+unsigned long csum_page (void * page)
+{
+ int i;
+ unsigned long *p = page;
+ unsigned long long sum=0;
+
+ for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
+ sum += p[i];
+
+ return sum ^ (sum>>32);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xg_private.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xg_private.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,170 @@
+#ifndef XG_PRIVATE_H
+#define XG_PRIVATE_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "xenctrl.h"
+
+#include <xen/linux/privcmd.h>
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size);
+unsigned long csum_page (void * page);
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PAT 0x080
+#define _PAGE_PSE 0x080
+#define _PAGE_GLOBAL 0x100
+
+#if defined(__i386__)
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+#define L1_PAGETABLE_SHIFT_PAE 12
+#define L2_PAGETABLE_SHIFT_PAE 21
+#define L3_PAGETABLE_SHIFT_PAE 30
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+#endif
+
+#if defined(__i386__)
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+#define L1_PAGETABLE_ENTRIES_PAE 512
+#define L2_PAGETABLE_ENTRIES_PAE 512
+#define L3_PAGETABLE_ENTRIES_PAE 4
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_ENTRIES 512
+#define L2_PAGETABLE_ENTRIES 512
+#define L3_PAGETABLE_ENTRIES 512
+#define L4_PAGETABLE_ENTRIES 512
+#endif
+
+#define PAGE_SHIFT XC_PAGE_SHIFT
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+typedef u32 l1_pgentry_32_t;
+typedef u32 l2_pgentry_32_t;
+typedef u64 l1_pgentry_64_t;
+typedef u64 l2_pgentry_64_t;
+typedef u64 l3_pgentry_64_t;
+typedef unsigned long l1_pgentry_t;
+typedef unsigned long l2_pgentry_t;
+#if defined(__x86_64__)
+typedef unsigned long l3_pgentry_t;
+typedef unsigned long l4_pgentry_t;
+#endif
+
+#if defined(__i386__)
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+ ((_a) >> L2_PAGETABLE_SHIFT)
+#define l1_table_offset_pae(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT_PAE) & (L1_PAGETABLE_ENTRIES_PAE - 1))
+#define l2_table_offset_pae(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT_PAE) & (L2_PAGETABLE_ENTRIES_PAE - 1))
+#define l3_table_offset_pae(_a) \
+ (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
+#elif defined(__x86_64__)
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(_a) \
+ (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(_a) \
+ (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+#endif
+
+#define ERROR(_m, _a...) \
+do { \
+ int __saved_errno = errno; \
+ fprintf(stderr, "ERROR: " _m "\n" , ## _a ); \
+ errno = __saved_errno; \
+} while (0)
+
+
+#define PERROR(_m, _a...) \
+do { \
+ int __saved_errno = errno; \
+ fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
+ __saved_errno, strerror(__saved_errno)); \
+ errno = __saved_errno; \
+} while (0)
+
+
+struct domain_setup_info
+{
+ unsigned long v_start;
+ unsigned long v_end;
+ unsigned long v_kernstart;
+ unsigned long v_kernend;
+ unsigned long v_kernentry;
+
+ unsigned int load_symtab;
+ unsigned int pae_kernel;
+ unsigned long symtab_addr;
+ unsigned long symtab_len;
+};
+
+typedef int (*parseimagefunc)(char *image, unsigned long image_size,
+ struct domain_setup_info *dsi);
+typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch,
+ u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi);
+
+struct load_funcs
+{
+ parseimagefunc parseimage;
+ loadimagefunc loadimage;
+};
+
+#define mfn_mapper_queue_size 128
+
+typedef struct mfn_mapper {
+ int xc_handle;
+ int size;
+ int prot;
+ int error;
+ int max_queue_size;
+ void * addr;
+ privcmd_mmap_t ioctl;
+
+} mfn_mapper_t;
+
+unsigned long xc_get_m2p_start_mfn (int xc_handle);
+
+int xc_copy_to_domain_page(int xc_handle, u32 domid,
+ unsigned long dst_pfn, void *src_page);
+
+unsigned long xc_get_filesz(int fd);
+
+void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
+ int xch, u32 dom, unsigned long *parray,
+ unsigned long vstart);
+
+int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
+ domid_t dom);
+
+/* image loading */
+int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs);
+int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs);
+int probe_aout9(char *image, unsigned long image_size, struct load_funcs
*funcs);
+
+#endif
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/example.txt
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/example.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,269 @@
+##
+# example.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file introduces into the tools to manage policies
+# and to label domains and resources.
+##
+
+We will show how to install and use the chwall_ste policy.
+Other policies work similarly. Feedback welcome!
+
+
+
+1. Using secpol_xml2bin to translate the chwall_ste policy:
+===========================================================
+
+#tools/security/secpol_xml2bin chwall_ste
+
+Successful execution should print:
+
+ [root@laptopxn security]# ./secpol_xml2bin chwall_ste
+ Validating label file
policies/chwall_ste/chwall_ste-security_label_template.xml...
+ XML Schema policies/security_policy.xsd valid.
+ Validating policy file
policies/chwall_ste/chwall_ste-security_policy.xml...
+ XML Schema policies/security_policy.xsd valid.
+ Creating ssid mappings ...
+ Creating label mappings ...
+ Max chwall labels: 7
+ Max chwall-types: 4
+ Max chwall-ssids: 5
+ Max ste labels: 14
+ Max ste-types: 6
+ Max ste-ssids: 10
+
+The tool looks in directory policies/chwall_ste for
+the label and policy files.
+
+The default policy directory structure under tools/security looks like:
+
+policies
+|-- security_policy.xsd
+|-- chwall
+| |-- chwall-security_label_template.xml
+| `-- chwall-security_policy.xml
+|-- chwall_ste
+| |-- chwall_ste-security_label_template.xml
+| `-- chwall_ste-security_policy.xml
+|-- null
+| |-- null-security_label_template.xml
+| `-- null-security_policy.xml
+`-- ste
+ |-- ste-security_label_template.xml
+ `-- ste-security_policy.xml
+
+policies/security_policy.xsd contains the schema against which both the
+label-template and the policy files must validate during translation.
+
+policies/chwall_ste/chwall_ste-security_policy.xml defines the
+policies and the types known to the policies.
+
+policies/chwall_ste/chwall_ste-security_label_template.xml contains
+label definitions that group chwall and ste types together and make
+them easier to use for users
+
+After executing the above secpol_xml2bin command, you will find 2 new
+files in the policies/chwall_ste sub-directory:
+
+policies/chwall_ste/chwall_ste.map ... this file includes the mapping
+of names from the xml files into their binary code representation.
+
+policies/chwall_ste/chwall_ste.bin ... this is the binary policy file,
+the result of parsing the xml files and using the mapping to extract a
+binary version that can be loaded into the hypervisor.
+
+
+
+2. Loading and activating the policy:
+=====================================
+
+We assume that xen is already configured to use the chwall_ste policy;
+please refer to install.txt for instructions.
+
+To activate the policy from the command line (assuming that the
+currently established policy is the minimal boot-policy that is
+hard-coded into the hypervisor:
+
+# ./secpol_tool loadpolicy policies/chwall_ste/chwall_ste.bin
+
+To activate the policy at next reboot:
+
+# cp policies/chwall_ste/chwall_ste.bin /boot
+
+Add a module line to your /boot/grub/grub.conf Xen entry.
+My boot entry with chwall_ste enabled looks like this:
+
+ title Xen (2.6.12)
+ root (hd0,5)
+ kernel /boot/xen.gz dom0_mem=1200000 console=vga
+ module /boot/vmlinuz-2.6.12-xen0 ro root=/dev/hda6 rhgb
+ module /boot/initrd-2.6.12-xen0.img
+ module /boot/chwall_ste.bin
+
+This tells the grub boot-loader to load the binary policy, which
+the hypervisor will recognize. The hypervisor will then establish
+this binary policy during boot instead of the minimal policy that
+is hardcoded as default.
+
+If you have any trouble here, maks sure you have the access control
+framework enabled (see: install.txt).
+
+
+
+3. Labeling domains:
+====================
+
+a) Labeling Domain0:
+
+The chwall_ste-security_label_template.xml file includes an attribute
+"bootstrap", which is set to the label name that will be assigned to
+Dom0 (this label will be mapped to ssidref 1/1, the default for Dom0).
+
+b) Labeling User Domains:
+
+Use the script tools/security/setlabel.sh to choose a label and to
+assign labels to user domains.
+
+To show available labels for the chwall_ste policy:
+
+#tools/security/setlabel.sh -l
+
+lists all available labels. For the default chwall_ste it should print
+the following:
+
+ [root@laptopxn security]# ./setlabel.sh -l chwall_ste
+ The following labels are available:
+ dom_SystemManagement
+ dom_HomeBanking
+ dom_Fun
+ dom_BoincClient
+ dom_StorageDomain
+ dom_NetworkDomain
+
+You need to have compiled the policy beforehand so that a .map file
+exists. Setlabel.sh uses the mapping file created throughout the
+policy translation to translate a user-friendly label string into a
+ssidref-number that is eventually used by the Xen hypervisor.
+
+We distinguish two kinds of labels: a) VM labels (for domains) and RES
+Labels (for resources). We are currently working on support for
+resource labeling but will focus here on VM labels.
+
+Setlabel.sh only prints VM labels (which we have prefixed with "dom_")
+since only those are used at this time.
+
+If you would like to assign the dom_HomeBanking label to one of your
+user domains (which you hopefully keep clean), look at an example
+domain configuration homebanking.xm:
+
+ #------HOMEBANKING---------
+ kernel = "/boot/vmlinuz-2.6.12-xenU"
+ ramdisk="/boot/U1_ramdisk.img"
+ memory = 65
+ name = "test34"
+ cpu = -1 # leave to Xen to pick
+ # Number of network interfaces. Default is 1.
+ nics=1
+ dhcp="dhcp"
+ #-------------------------
+
+Now we label this domain
+
+[root@laptopxn security]# ./setlabel.sh homebanking.xm dom_HomeBanking
chwall_ste
+Mapped label 'dom_HomeBanking' to ssidref '0x00020002'.
+
+The domain configuration my look now like:
+
+ [root@laptopxn security]# cat homebanking.xm
+ #------HOMEBANKING---------
+ kernel = "/boot/vmlinuz-2.6.12-xenU"
+ ramdisk="/boot/U1_ramdisk.img"
+ memory = 65
+ name = "test34"
+ cpu = -1 # leave to Xen to pick
+ # Number of network interfaces. Default is 1.
+ nics=1
+ dhcp="dhcp"
+ #-------------------------
+ #ACM_POLICY=chwall_ste-security_policy.xml
+ #ACM_LABEL=dom_HomeBanking
+ ssidref = 0x00020002
+
+You can see 3 new entries, two of which are comments. The only value
+that the hypervisor cares about is the ssidref that will reference
+those types assigned to this label. You can look them up in the
+xml label-template file for the chwall_ste policy.
+
+This script will eventually move into the domain management and will
+be called when the domain is instantiated. For now, the setlabel
+script must be run on domains whenever the policy files change since
+the mapping between label names and ssidrefs can change in this case.
+
+
+4. Starting a labeled domain
+============================
+
+Now, start the domain:
+ #xm create -c homebanking.xm
+
+
+If you label another domain configuration as dom_Fun and try to start
+it afterwards, its start will fail. Why?
+
+Because the running homebanking domain has the chinese wall type
+"cw_Sensitive". The new domain dom_Fun has the chinese wall label
+"cw_Distrusted". This domain is not allowed to run simultaneously
+because of the defined conflict set
+
+ <conflictset name="Protection1">
+ <type>cw_Sensitive</type>
+ <type>cw_Distrusted</type>
+ </conflictset>
+
+(in policies/chwall_ste/chwall_ste-security_policy.xml), which says
+that only one of the types cw_sensitive and cw_Distrusted can run at a
+time.
+
+If you save or shutdown the HomeBanking domain, you will be able to
+start the "Fun" domain. You can look into the Xen log to see if a
+domain was denied to start because of the access control framework
+with the command 'xm dmesg'.
+
+It is important (and usually non-trivial) to define the labels in a
+way that the semantics of the labels are enforced and supported by the
+types and the conflict sets.
+
+Note: While the chinese wall policy enforcement is complete, the type
+enforcement is currently enforced in the Xen hypervisor
+only. Therefore, only point-to-point sharing with regard to the type
+enforcement is currently controlled. We are working on enhancements to
+Dom0 that enforce types also for network traffic that is routed
+through Dom0 and on the enforcement of resource labeling when binding
+resources to domains (e.g., enforcing types between domains and
+hardware resources, such as disk partitions).
+
+
+4. Adding your own policies
+===========================
+
+Writing your own policy (e.g. "mypolicy") requires the following:
+
+a) the policy definition (types etc.) file
+b) the label template definition (labels etc.) file
+
+If your policy name is "mypolicy", you need to create a
+subdirectory mypolicy in tools/security/policies.
+
+Then you create
+tools/security/policies/mypolicy/mypolicy-security_policy.xml and
+tools/security/policies/mypolicy/mypolicy-security_label_template.xml.
+
+You need to keep to the schema as defined in
+tools/security/security_policy.xsd since the translation tool
+secpol_xml2bin is written against this schema.
+
+If you keep to the security policy schema, then you can use all the
+tools described above. Refer to install.txt to install it.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/install.txt
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/install.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,67 @@
+##
+# install.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file shows how to activate and install the access control
+# framework.
+##
+
+
+INSTALLING A SECURITY POLICY IN XEN
+===================================
+
+By default, the access control architecture is disabled in Xen. To
+enable the access control architecture in Xen follow the steps below.
+This description assumes that you want to install the Chinese Wall and
+Simple Type Enforcement policy. Some file names need to be replaced
+below to activate the Chinese Wall OR the Type Enforcement policy
+exclusively (chwall_ste --> {chwall, ste}).
+
+1. enable access control in Xen
+ # cd "xen_root"
+ # edit/xemacs/vi Config.mk
+
+ change the line:
+ ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
+
+ to:
+ ACM_USE_SECURITY_POLICY ?=
ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+
+ # make all
+ # ./install.sh
+
+2. compile the policy from xml to a binary format that can be loaded
+ into the hypervisor for enforcement
+ # cd tools/security
+ # make
+
+ manual steps (alternative to make boot_install):
+ #./secpol_xml2bin chwall_ste
+ #cp policies/chwall_ste/chwall_ste.bin /boot
+ #edit /boot/grub/grub.conf
+ add the follwoing line to your xen boot entry:
+ "module chwall_ste.bin"
+
+ alternatively, you can try our automatic translation and
+ installation of the policy:
+ # make boot_install
+
+ [we try hard to do the right thing to the right boot entry but
+ please verify boot entry in /boot/grub/grub.conf afterwards;
+ your xen boot entry should have an additional module line
+ specifying a chwall_ste.bin file with the correct directory
+ (e.g. "/" or "/boot").]
+
+
+3. reboot into the newly compiled hypervisor
+
+ after boot
+ #xm dmesg should show an entry about the policy being loaded
+ during the boot process
+
+ #tools/security/secpol_tool getpolicy
+ should print the new chwall_ste binary policy representation
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/chwall/chwall-security_label_template.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall/chwall-security_label_template.xml Thu Aug
25 22:53:20 2005
@@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security labels, which can -->
+<!-- be attached to Domains and resources. Based on -->
+<!-- these labels, the access control module decides -->
+<!-- about sharing between Domains and about access -->
+<!-- of Domains to real resources. -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+ <LabelHeader>
+ <Name>chwall-security_label_template</Name>
+ <Date>2005-08-10</Date>
+ <PolicyName>
+ <Url>chwall-security_policy.xml</Url>
+ <Reference>abcdef123456abcdef</Reference>
+ </PolicyName>
+ </LabelHeader>
+
+ <SubjectLabels bootstrap="dom_SystemManagement">
+ <!-- single ste typed domains -->
+ <!-- ACM enforces that only domains with -->
+ <!-- the same type can share information -->
+ <!-- -->
+ <!-- Bootstrap label is assigned to Dom0 -->
+ <VirtualMachineLabel>
+ <Name>dom_HomeBanking</Name>
+ <ChineseWallTypes>
+ <Type>cw_Sensitive</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <Name>dom_Fun</Name>
+ <ChineseWallTypes>
+ <Type>cw_Distrusted</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- donating some cycles to seti@home -->
+ <Name>dom_BoincClient</Name>
+ <ChineseWallTypes>
+ <Type>cw_Isolated</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <!-- Domains with multiple ste types services; such domains -->
+ <!-- must keep the types inside their domain safely confined. -->
+ <VirtualMachineLabel>
+ <Name>dom_SystemManagement</Name>
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves persistent storage to other domains -->
+ <Name>dom_StorageDomain</Name>
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves network access to other domains -->
+ <Name>dom_NetworkDomain</Name>
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+ </SubjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/chwall/chwall-security_policy.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall/chwall-security_policy.xml Thu Aug 25
22:53:20 2005
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security policies, which -->
+<!-- can be enforced by the Xen Access Control Module. -->
+<!-- Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+ <Name>chwall-security_policy</Name>
+ <Date>2005-08-10</Date>
+</PolicyHeader>
+<!-- -->
+<!-- example of a chinese wall type definition -->
+<!-- along with its conflict sets -->
+<!-- (typse in a confict set are exclusive, i.e. -->
+<!-- once a Domain with one type of a set is -->
+<!-- running, no other Domain with another type -->
+<!-- of the same conflict set can start.) -->
+ <ChineseWall priority="PrimaryPolicyComponent">
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ <Type>cw_Sensitive</Type>
+ <Type>cw_Isolated</Type>
+ <Type>cw_Distrusted</Type>
+ </ChineseWallTypes>
+
+ <ConflictSets>
+ <Conflict name="Protection1">
+ <Type>cw_Sensitive</Type>
+ <Type>cw_Distrusted</Type>
+ </Conflict>
+ </ConflictSets>
+ </ChineseWall>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/chwall_ste/chwall_ste-security_label_template.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall_ste/chwall_ste-security_label_template.xml
Thu Aug 25 22:53:20 2005
@@ -0,0 +1,167 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security labels, which can -->
+<!-- be attached to Domains and resources. Based on -->
+<!-- these labels, the access control module decides -->
+<!-- about sharing between Domains and about access -->
+<!-- of Domains to real resources. -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+ <LabelHeader>
+ <Name>chwall_ste-security_label_template</Name>
+ <Date>2005-08-10</Date>
+ <PolicyName>
+ <Url>chwall_ste-security_policy.xml</Url>
+ <Reference>abcdef123456abcdef</Reference>
+ </PolicyName>
+ </LabelHeader>
+
+ <SubjectLabels bootstrap="dom_SystemManagement">
+ <!-- single ste typed domains -->
+ <!-- ACM enforces that only domains with -->
+ <!-- the same type can share information -->
+ <!-- -->
+ <!-- Bootstrap label is assigned to Dom0 -->
+ <VirtualMachineLabel>
+ <Name>dom_HomeBanking</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_Sensitive</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <Name>dom_Fun</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_Distrusted</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- donating some cycles to seti@home -->
+ <Name>dom_BoincClient</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_DonatedCycles</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_Isolated</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <!-- Domains with multiple ste types services; such domains -->
+ <!-- must keep the types inside their domain safely confined. -->
+ <VirtualMachineLabel>
+ <Name>dom_SystemManagement</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- since dom0 needs access to every domain and -->
+ <!-- resource right now ... -->
+ <Type>ste_SystemManagement</Type>
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ <Type>ste_DonatedCycles</Type>
+ <Type>ste_PersistentStorageA</Type>
+ <Type>ste_NetworkAdapter0</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves persistent storage to other domains -->
+ <Name>dom_StorageDomain</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- access right to the resource (hard drive a) -->
+ <Type>ste_PersistentStorageA</Type>
+ <!-- can serve following types -->
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves network access to other domains -->
+ <Name>dom_NetworkDomain</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- access right to the resource (ethernet card) -->
+ <Type>ste_NetworkAdapter0</Type>
+ <!-- can serve following types -->
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ <Type>ste_DonatedCycles</Type>
+ </SimpleTypeEnforcementTypes>
+
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ </ChineseWallTypes>
+ </VirtualMachineLabel>
+ </SubjectLabels>
+
+ <ObjectLabels>
+ <ResourceLabel>
+ <Name>res_ManagementResource</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_HardDrive (hda)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersistentStorageA</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_LogicalDiskPartition1 (hda1)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_LogicalDiskPartition2 (hda2)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_EthernetCard</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_NetworkAdapter0</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_SecurityToken</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_GraphicsAdapter</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+ </ObjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/chwall_ste/chwall_ste-security_policy.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall_ste/chwall_ste-security_policy.xml Thu Aug
25 22:53:20 2005
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security policies, which -->
+<!-- can be enforced by the Xen Access Control Module. -->
+<!-- Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+ <Name>chwall_ste-security_policy</Name>
+ <Date>2005-08-10</Date>
+</PolicyHeader>
+<!-- -->
+<!-- example of a simple type enforcement policy definition -->
+<!-- -->
+ <SimpleTypeEnforcement>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type> <!-- machine/security
management -->
+ <Type>ste_PersonalFinances</Type> <!-- personal finances -->
+ <Type>ste_InternetInsecure</Type> <!-- games, active X, etc. -->
+ <Type>ste_DonatedCycles</Type> <!-- donation to
BOINC/seti@home -->
+ <Type>ste_PersistentStorageA</Type> <!-- domain managing the
harddrive A-->
+ <Type>ste_NetworkAdapter0</Type> <!-- type of the domain
managing ethernet adapter 0-->
+ </SimpleTypeEnforcementTypes>
+ </SimpleTypeEnforcement>
+<!-- -->
+<!-- example of a chinese wall type definition -->
+<!-- along with its conflict sets -->
+<!-- (typse in a confict set are exclusive, i.e. -->
+<!-- once a Domain with one type of a set is -->
+<!-- running, no other Domain with another type -->
+<!-- of the same conflict set can start.) -->
+ <ChineseWall priority="PrimaryPolicyComponent">
+ <ChineseWallTypes>
+ <Type>cw_SystemManagement</Type>
+ <Type>cw_Sensitive</Type>
+ <Type>cw_Isolated</Type>
+ <Type>cw_Distrusted</Type>
+ </ChineseWallTypes>
+
+ <ConflictSets>
+ <Conflict name="Protection1">
+ <Type>cw_Sensitive</Type>
+ <Type>cw_Distrusted</Type>
+ </Conflict>
+ </ConflictSets>
+ </ChineseWall>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/null/null-security_label_template.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/null/null-security_label_template.xml Thu Aug
25 22:53:20 2005
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security labels, which can -->
+<!-- be attached to Domains and resources. Based on -->
+<!-- these labels, the access control module decides -->
+<!-- about sharing between Domains and about access -->
+<!-- of Domains to real resources. -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+ <LabelHeader>
+ <Name>null-security_label_template</Name>
+
+ <Date>2005-08-10</Date>
+ <PolicyName>
+ <Url>null-security_policy.xml</Url>
+
+ <Reference>abcdef123456abcdef</Reference>
+ </PolicyName>
+ </LabelHeader>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/null/null-security_policy.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/null/null-security_policy.xml Thu Aug 25
22:53:20 2005
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security policies, which -->
+<!-- can be enforced by the Xen Access Control Module. -->
+<!-- Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+ <Name>null-security_policy</Name>
+ <Date>2005-08-10</Date>
+</PolicyHeader>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/policies/security_policy.xsd
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/security_policy.xsd Thu Aug 25 22:53:20 2005
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Ray Valdez, Reiner Sailer {rvaldez,sailer}@us.ibm.com -->
+<!-- This file defines the schema, which is used to define -->
+<!-- the security policy and the security labels in Xe. -->
+
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.ibm.com" xmlns="http://www.ibm.com"
elementFormDefault="qualified">
+ <xsd:element name="SecurityPolicyDefinition">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="PolicyHeader" minOccurs="0"
maxOccurs="1"></xsd:element>
+ <xsd:element ref="SimpleTypeEnforcement"
minOccurs="0" maxOccurs="1"></xsd:element>
+ <xsd:element ref="ChineseWall" minOccurs="0"
maxOccurs="1"></xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="SecurityLabelTemplate">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="LabelHeader" minOccurs="1"
maxOccurs="1"></xsd:element>
+ <xsd:element name="SubjectLabels" minOccurs="0"
maxOccurs="1">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element
ref="VirtualMachineLabel" minOccurs="1" maxOccurs="unbounded"></xsd:element>
+ </xsd:sequence>
+ <xsd:attribute name="bootstrap"
type="xsd:string" use="required"></xsd:attribute>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="ObjectLabels" minOccurs="0"
maxOccurs="1">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element
ref="ResourceLabel" minOccurs="1" maxOccurs="unbounded"></xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="PolicyHeader">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="Name" minOccurs="1"
maxOccurs="1" />
+ <xsd:element ref="Date" minOccurs="1"
maxOccurs="1" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="LabelHeader">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="Name"></xsd:element>
+ <xsd:element ref="Date" minOccurs="1"
maxOccurs="1"></xsd:element>
+ <xsd:element ref="PolicyName" minOccurs="1"
maxOccurs="1"></xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="SimpleTypeEnforcement">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="SimpleTypeEnforcementTypes" />
+ </xsd:sequence>
+ <xsd:attribute name="priority" type="PolicyOrder"
use="optional"></xsd:attribute>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="ChineseWall">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="ChineseWallTypes" />
+ <xsd:element ref="ConflictSets" />
+ </xsd:sequence>
+ <xsd:attribute name="priority" type="PolicyOrder"
use="optional"></xsd:attribute>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="ChineseWallTypes">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element maxOccurs="unbounded"
minOccurs="1" ref="Type" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="ConflictSets">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element maxOccurs="unbounded"
minOccurs="1" ref="Conflict" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="SimpleTypeEnforcementTypes">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element maxOccurs="unbounded"
minOccurs="1" ref="Type" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="Conflict">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element maxOccurs="unbounded"
minOccurs="1" ref="Type" />
+ </xsd:sequence>
+ <xsd:attribute name="name" type="xsd:string"
use="optional"></xsd:attribute>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="VirtualMachineLabel">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="Name"></xsd:element>
+ <xsd:element ref="SimpleTypeEnforcementTypes"
minOccurs="0" maxOccurs="unbounded" />
+ <xsd:element ref="ChineseWallTypes"
minOccurs="0" maxOccurs="unbounded" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="ResourceLabel">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="Name"></xsd:element>
+ <xsd:element ref="SimpleTypeEnforcementTypes"
minOccurs="0" maxOccurs="unbounded" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="PolicyName">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element ref="Url" />
+ <xsd:element ref="Reference" />
+ </xsd:sequence>
+ </xsd:complexType>
+ </xsd:element>
+ <xsd:element name="Date" type="xsd:string" />
+ <xsd:element name="Name" type="xsd:string" />
+ <xsd:element name="Type" type="xsd:string" />
+ <xsd:element name="Reference" type="xsd:string" />
+ <xsd:element name="Url"></xsd:element>
+
+ <xsd:simpleType name="PolicyOrder">
+ <xsd:restriction base="xsd:string">
+ <xsd:enumeration
value="PrimaryPolicyComponent"></xsd:enumeration>
+ </xsd:restriction>
+ </xsd:simpleType>
+
+</xsd:schema>
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/ste/ste-security_label_template.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/ste/ste-security_label_template.xml Thu Aug
25 22:53:20 2005
@@ -0,0 +1,143 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security labels, which can -->
+<!-- be attached to Domains and resources. Based on -->
+<!-- these labels, the access control module decides -->
+<!-- about sharing between Domains and about access -->
+<!-- of Domains to real resources. -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+ <LabelHeader>
+ <Name>ste-security_label_template</Name>
+ <Date>2005-08-10</Date>
+ <PolicyName>
+ <Url>ste-security_policy.xml</Url>
+ <Reference>abcdef123456abcdef</Reference>
+ </PolicyName>
+ </LabelHeader>
+
+ <SubjectLabels bootstrap="dom_SystemManagement">
+ <!-- single ste typed domains -->
+ <!-- ACM enforces that only domains with -->
+ <!-- the same type can share information -->
+ <!-- -->
+ <!-- Bootstrap label is assigned to Dom0 -->
+ <VirtualMachineLabel>
+ <Name>dom_HomeBanking</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <Name>dom_Fun</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- donating some cycles to seti@home -->
+ <Name>dom_BoincClient</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_DonatedCycles</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+
+ <!-- Domains with multiple ste types services; such domains -->
+ <!-- must keep the types inside their domain safely confined. -->
+ <VirtualMachineLabel>
+ <Name>dom_SystemManagement</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- since dom0 needs access to every domain and -->
+ <!-- resource right now ... -->
+ <Type>ste_SystemManagement</Type>
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ <Type>ste_DonatedCycles</Type>
+ <Type>ste_PersistentStorageA</Type>
+ <Type>ste_NetworkAdapter0</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves persistent storage to other domains -->
+ <Name>dom_StorageDomain</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- access right to the resource (hard drive a) -->
+ <Type>ste_PersistentStorageA</Type>
+ <!-- can serve following types -->
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+
+ <VirtualMachineLabel>
+ <!-- serves network access to other domains -->
+ <Name>dom_NetworkDomain</Name>
+ <SimpleTypeEnforcementTypes>
+ <!-- access right to the resource (ethernet card) -->
+ <Type>ste_NetworkAdapter0</Type>
+ <!-- can serve following types -->
+ <Type>ste_PersonalFinances</Type>
+ <Type>ste_InternetInsecure</Type>
+ <Type>ste_DonatedCycles</Type>
+ </SimpleTypeEnforcementTypes>
+ </VirtualMachineLabel>
+ </SubjectLabels>
+
+ <ObjectLabels>
+ <ResourceLabel>
+ <Name>res_ManagementResource</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_HardDrive (hda)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersistentStorageA</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_LogicalDiskPartition1 (hda1)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_LogicalDiskPartition2 (hda2)</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_InternetInsecure</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_EthernetCard</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_NetworkAdapter0</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_SecurityToken</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_PersonalFinances</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+
+ <ResourceLabel>
+ <Name>res_GraphicsAdapter</Name>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type>
+ </SimpleTypeEnforcementTypes>
+ </ResourceLabel>
+ </ObjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77
tools/security/policies/ste/ste-security_policy.xml
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/ste/ste-security_policy.xml Thu Aug 25
22:53:20 2005
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com -->
+<!-- This file defines the security policies, which -->
+<!-- can be enforced by the Xen Access Control Module. -->
+<!-- Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+ <Name>ste-security_policy</Name>
+ <Date>2005-08-10</Date>
+</PolicyHeader>
+<!-- -->
+<!-- example of a simple type enforcement policy definition -->
+<!-- -->
+ <SimpleTypeEnforcement>
+ <SimpleTypeEnforcementTypes>
+ <Type>ste_SystemManagement</Type> <!-- machine/security
management -->
+ <Type>ste_PersonalFinances</Type> <!-- personal finances -->
+ <Type>ste_InternetInsecure</Type> <!-- games, active X, etc. -->
+ <Type>ste_DonatedCycles</Type> <!-- donation to
BOINC/seti@home -->
+ <Type>ste_PersistentStorageA</Type> <!-- domain managing the
harddrive A-->
+ <Type>ste_NetworkAdapter0</Type> <!-- type of the domain
managing ethernet adapter 0-->
+ </SimpleTypeEnforcementTypes>
+ </SimpleTypeEnforcement>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/policy.txt
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/policy.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,405 @@
+##
+# policy.txt <description to the Xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file gives an overview of the security policies currently
+# provided and also gives some reasoning about how to assign
+# labels to domains.
+##
+
+Xen access control policies
+
+
+General explanation of supported security policies:
+=====================================================
+
+We have implemented the mandatory access control architecture of our
+hypervisor security architecture (sHype) for the Xen hypervisor. It
+controls communication (in Xen: event channels, grant tables) between
+Virtual Machines (from here on called domains) and through this the
+virtual block devices, networking, and shared memory are implemented
+on top of these communication means. While we have implemented the
+described policies and access control architecture for other
+hypervisor systems, we will describe below specifically its
+implementation and use in the Xen hypervisor. The policy enforcement
+is called mandatory regarding user domains since the policy it is
+given by the security administration and enforced independently of the
+user domains by the Xen hypervisor in cooperation with the domain
+management.
+
+The access control architecture consists of three parts:
+
+i) The access control policy determines the "command set" of the ACM
+and the hooks with which they can be configured to constrain the
+sharing of virtual resources. The current access control architecture
+implemented for Xen supports two policies: Chinese Wall and Simple
+Type Enforcement, which we describe in turn below.
+
+
+ii) The actually enforced policy instantiation uses the policy
+language (i) to configure the Xen access control in a way that suits
+the specific application (home desktop environment, company desktop,
+Web server system, etc.). We have defined an exemplary policy
+instantiation for Chinese Wall (chwall policy) and Simple Type
+Enforcement (ste policy) for a desktop system. We offer these policies
+in combination since they are controlling orthogonal events.
+
+
+iii) The access control module (ACM) and related hooks are part of the
+core hypervisor and their controls cannot be bypassed by domains. The
+ACM and hooks are the active security components. We refer to
+publications that describe how access control is enforced in the Xen
+hypervisor using the ACM (access decision) and the hooks (decision
+enforcement) inserted into the setup of event channels and grant
+tables, and into domain operations (create, destroy, save, restore,
+migrate). These controls decide based on the active policy
+configuration (see i. and ii.) if the operation proceeds of if the
+operation is aborted (denied).
+
+
+In general, security policy instantiations in the Xen access control
+framework are defined by two files:
+
+a) a single "policy-name"-security_policy.xml file that defines the
+types known to the ACM and policy rules based on these types
+
+b) a single "policy-name"-security_label_template.xml file that
+defines labels based on known types
+
+Every security policy has its own sub-directory under
+"Xen-root"/tools/security/policies in order to simplify their
+management and the security policy tools. We will describe those files
+for our example policy (Chinese Wall and Simple Type Enforcement) in
+more detail as we go along. Eventually, we will move towards a system
+installation where the policies will reside under /etc.
+
+
+CHINESE WALL
+============
+
+The Chinese Wall policy enables the user to define "which workloads
+(domain payloads) cannot run on a single physical system at the same
+time". Why would we want to prevent workloads from running at the same
+time on the same system? This supports requirements that can (but
+don't have to) be rooted in the measure of trust into the isolation of
+different domains that share the same hardware. Since the access
+control architecture aims at high performance and non-intrusive
+implementation, it currently does not address covert (timing) channels
+and aims at medium assurance. Users can apply the Chinese Wall policy
+to guarantee an air-gap between very sensitive payloads both regarding
+covert information channels and regarding resource starvation.
+
+To enable the CW control, each domain is labeled with a set of Chinese
+Wall types and CW Conflict Sets are defined which include those CW
+types that cannot run simultaneously on the same hardware. This
+interpretation of conflict sets is the only policy rule for the Chines
+Wall policy.
+
+This is enforced by controlling the start of domains according to
+their assigned CW worload types. Domains with Chinese Wall types that
+appear in a common conflict set are running mutually exclusive on a
+platform, i.e., once a domain with one of the cw-types of a conflict
+set is running, no domain with another cw-type of the same conflict
+set can start until the first domain is destroyed, paused, or migrated
+away from the physical system (this assumes that such a partition can
+no longer be observed). The idea is to assign cw-types according to
+the type of payload that a domain runs and to use the Chinese Wall
+policy to ensure that payload types can be differentiated by the
+hypervisor and can be prevented from being executed on the same system
+at the same time. Using the flexible CW policy maintains system
+consolidation and workload-balancing while introducing guaranteed
+constraints where necessary.
+
+
+Example of a Chinese Wall Policy Instantiation
+----------------------------------------------
+
+The file chwall-security_policy.xml defines the Chinese Wall types as
+well as the conflict sets for our example policy (you find it in the
+directory "xen_root"/tools/security/policies/chwall).
+
+It defines four Chinese Wall types (prefixed with cw_) with the
+following meaning:
+
+* cw_SystemsManagement is a type identifying workloads for systems
+management, e.g., domain management, device management, or hypervisor
+management.
+
+* cw_Sensitive is identifying workloads that are critical to the user
+for one reason or another.
+
+* cw_Distrusted is identifying workloads a user does not have much
+confidence in. E.g. a domain used for surfing in the internet without
+protection( i.e., active-X, java, java-script, executing web content)
+or for (Internet) Games should be typed this way.
+
+* cw_Isolated is identifying workloads that are supposedly isolated by
+use of the type enforcement policy (described below). For example, if
+a user wants to donate cycles to seti@home, she can setup a separate
+domain for a Boinc (http://boinc.ssl.berkeley.edu/) client, disable
+this domain from accessing the hard drive and from communicating to
+other local domains, and type it as cw_Isolated. We will look at a
+specific example later.
+
+The example policy uses the defined types to define one conflict set:
+Protection1 = {cw_Sensitive, cw_Distrusted}. This conflict set tells
+the hypervisor that once a domain typed as cw_Sensitive is running, a
+domain typed as cw_Distrusted cannot run concurrently (and the other
+way round). With this policy, a domain typed as cw_Isolated is allowed
+to run simultaneously with domains tagged as cw_Sensitive.
+
+Consequently, the access control module in the Xen hypervisor
+distinguishes in this example policy 4 different workload types in
+this example policy. It is the user's responsibility to type the
+domains in a way that reflects the workloads of these domains and, in
+the case of cw_Isolated, its properties, e.g. by configuring the
+sharing capabilities of the domain accordingly by using the simple
+type enforcement policy.
+
+Users can define their own or change the existing example policy
+according to their working environment and security requirements. To
+do so, replace the file chwall-security_policy.xml with the new
+policy.
+
+
+SIMPLE TYPE ENFORCEMENT
+=======================
+
+The file ste-security_policy.xml defines the simple type enforcement
+types for our example policy (you find it in the directory
+"xen_root"/tools/security/policies/ste). The Simple Type Enforcement
+policy defines which domains can share information with which other
+domains. To this end, it controls
+
+i) inter-domain communication channels (e.g., network traffic, events,
+and shared memory).
+
+ii) access of domains to physical resources (e.g., hard drive, network
+cards, graphics adapter, keyboard).
+
+In order to enable the hypervisor to distinguish different domains and
+the user to express access rules, the simple type enforcement defines
+a set of types (ste_types).
+
+The policy defines that communication between domains is allowed if
+the domains share a common STE type. As with the chwall types, STE
+types should enable the differentiation of workloads. The simple type
+enforcement access control implementation in the hypervisor enforces
+that domains can only communicate (setup event channels, grant tables)
+if they share a common type, i.e., both domains have assigned at least
+on type in common. A domain can access a resource, if the domain and
+the resource share a common type. Hence, assigning STE types to
+domains and resources allows users to define constraints on sharing
+between domains and to keep sensitive data confined from distrusted
+domains.
+
+Domain <--> Domain Sharing
+''''''''''''''''''''''''''
+(implemented but its effective use requires factorization of Dom0)
+
+a) Domains with a single STE type (general user domains): Sharing
+between such domains is enforced entirely by the hypervisor access
+control. It is independent of the domains and does not require their
+co-operation.
+
+b) Domains with multiple STE types: One example is a domain that
+virtualizes a physical resource (e.g., hard drive) and serves it as
+multiple virtual resources (virtual block drives) to other domains of
+different types. The idea is that only a specific device domain has
+assigned the type required to access the physical hard-drive. Logical
+drives are then assigned the types of domains that have access to this
+logical drive. Since the Xen hypervisor cannot distinguish between the
+logical drives, the access control (type enforcement) is delegated to
+the device domain, which has access to the types of domains requesting
+to mount a logical drive as well as the types assigned to the
+different available logical drives.
+
+Currently in Xen, Dom0 controls all hardware, needs to communicate
+with all domains during their setup, and intercepts all communication
+between domains. Consequently, Dom0 needs to be assigned all types
+used and must be completely trusted to maintain the separation of
+informatio ncoming from domains with different STE types. Thus a
+refactoring of Dom0 is recommended for stronger confinement
+guarantees.
+
+Domain --> RESOURCES Access
+'''''''''''''''''''''''''''
+(current work)
+
+We define for each resource that we want to distinguish a separate STE
+type. Each STE type is assigned to the respective resource and to
+those domains that are allowed to access this resource. Type
+enforcement will guarantee that other domains cannot access this
+resource since they don't share the resource's STE type.
+
+Since in the current implementation of Xen, Dom0 controls access to
+all hardware (e.g., disk drives, network), Domain-->Resource access
+control enforcement must be implemented in Dom0. This is possible
+since Dom0 has access to both the domain configuration (including the
+domain STE types) and the resource configuration (including the
+resource STE types).
+
+For purposes of gaining higher assurance in the resulting system, it
+may be desirable to reduce the size of dom0 by adding one or more
+"device domains" (DDs). These DDs, e.g. providing storage or network
+access, can support one or more physical devices, and manage
+enforcement of MAC policy relevant for said devices. Security benefits
+come from the smaller size of these DDs, as they can be more easily
+audited than monolithic device driver domains. DDs can help to obtain
+maximum security benefit from sHype.
+
+
+Example of a Simple Type Enforcement Policy Instantiation
+---------------------------------------------------------
+
+We define the following types:
+
+* ste_SystemManagement identifies workloads (and domains that runs
+them) that must share information to accomplish the management of the
+system
+
+* ste_PersonalFinances identifies workloads that are related to
+sensitive programs such as HomeBanking applications or safely
+configured web browsers for InternetBanking
+
+* ste_InternetInsecure identifies workloads that are very
+function-rich and unrestricted to offer for example an environment
+where internet games can run efficiently
+
+* ste_DonatedCycles identifies workloads that run on behalf of others,
+e.g. a Boinc client
+
+* ste_PersistentStorage identifies workloads that have direct access
+to persistent storage (e.g., hard drive)
+
+* ste_NetworkAccess identifies workload that have direct access to
+network cards and related networks
+
+
+
+SECURITY LABEL TEMPLATES
+========================
+
+We introduce security label templates because it is difficult for
+users to ensure tagging of domains consistently and since there are
+--as we have seen in the case of isolation-- useful dependencies
+between the policies. Security Label Templates define type sets that
+can be addressed by more user-friendly label names,
+e.g. dom_Homebanking describes a typical typeset tagged to domains
+used for sensitive Homebanking work-loads. Labels are defined in the
+file
+
+Using Security Label Templates has multiple advantages:
+a) easy reference of typical sets of type assignments
+b) consistent interpretation of type combinations
+c) meaningful application-level label names
+
+The definition of label templates depends on the combination of
+policies that are used. We will describe some of the labels defined
+for the Chinese Wall and Simple Type Enforcement combination.
+
+In the BoincClient example, the label_template file specifies that
+this Label is assigned the Chinese Wall type cw_Isolated. We do this
+assuming that this BoincClient is isolated against the rest of the
+system infrastructure (no persistent memory, no sharing with local
+domains). Since cw_Isolated is not included in any conflict set, it
+can run at any time concurrently with any other domain. The
+ste_DonatedCycles type assigned to the BoincClient reflect the
+isolation assumption: it is only assigned to the dom_NetworkDomain
+giving the BoincClient domain access to the network to communicate
+with its BoincServer.
+
+The strategy for combining types into Labels is the following: First
+we define a label for each type of general user domain
+(workload-oriented). Then we define a new label for each physical
+resource that shall be shared using a DD domain (e.g., disk) and for
+each logical resource offered through this physical resource (logical
+disk partition). We define then device domain labels (here:
+dom_SystemManagement, dom_StorageDomain, dom_NetworkDomain) which
+include the types of the physical resources (e.g. hda) their domains
+need to connect to. Such physical resources can only be accessed
+directly by device domains types with the respective device's STE
+type. Additionally we assign to such a device domain Label the STE
+types of those user domains that are allowed to access one of the
+logical resources (e.g., hda1, hda2) built on top of this physical
+resource through the device domain.
+
+
+Label Construction Example:
+---------------------------
+
+We define here a storage domain label for a domain that owns a real
+disk drive and creates the logical disk partitions hda1 and hda2 which
+it serves to domains labeled dom_HomeBanking and dom_Fun
+respectively. The labels we refer to are defined in the label template
+file policies/chwall_ste/chwall_ste-security-label-template.xml.
+
+step1: To distinguish different shared disk drives, we create a
+separate Label and STE type for each of them. Here: we create a type
+ste_PersistentStorageA for disk drive hda. If you have another disk
+drive, you may define another persistent storage type
+ste_PersistentStorageB in the chwall_ste-security_policy.xml.
+
+step2: To distinguish different domains, we create multiple domain
+labels including different types. Here: label dom_HomeBanking includes
+STE type ste_PersonalFinances, label dom_Fun includes STE type
+ste_InternetInsecure.
+
+step3: The storage domain in charge of the hard drive A needs access
+to this hard drive. Therefore the storage domain label
+dom_StorageDomain must include the type assigned to the hard drive
+(ste_PersistentStorageA).
+
+step4: In order to serve dom hda1 to domains labeled dom_HomeBanking
+and hda2 to domains labeled dom_Fun, the storage domain label must
+include the types of those domains as well (ste_PersonalFinance,
+ste_InternetInsecure).
+
+step5: In order to keep the data for different types safely apart, the
+different logical disk partitions must be assigned unique labels and
+types, which are used inside the storage domain to extend the ACM
+access enforcement to logical resources served from inside the storage
+domain. We define labels "res_LogicalDiskPartition1 (hda1)" and assign
+it to hda1 and "res_LogicalDiskPartition2 (hda2)" and assign it to
+hda2. These labels must include the STE types of those domains that
+are allowed to use them (e.g., ste_PersonalFinances for hda1).
+
+The overall mandatory access control is then enforced in 3 different
+Xen components and these components use a single consistent policy to
+co-operatively enforce the policy. In the storage domain example, we
+have three components that co-operate:
+
+1. The ACM module inside the hypervisor enforces: communication between
+user domains and the storage domain (only domains including types
+ste_PersonalFinances or ste_InternetInsecure can communicate with the
+storage domain and request access to logical resource). This confines
+the sharing to the types assigned to the storage domain.
+
+2. The domain management will enforce (work in progress): assignment of
+real resources (hda) to domains (storage domain) that share a
+type with the resource.
+
+3. If the storage domain serves multiple STE types (as in our example),
+it enforces (work in progress): that domains can access (mount)
+logical resources only if they share an STE type with the respective
+resource. In our example, domains with the STE type
+ste_PersonalFinances can request access (mount) to logical resource
+hda1 from the storage domain.
+
+If you look at the virtual machine label dom_StorageDomain, you will
+see the minimal set of types assigned to our domain manageing disk
+drive hda for serving logical disk partitions exclusively to
+dom_HomeBanking and dom_Fun.
+
+Similary, network domains can confine access to the network or
+network communication between user domains.
+
+As a result, device domains (e.g., storage domain, network domain)
+must be simple and small to ensure their correct co-operation in the
+type enforcement model. If such trust is not possible, then hardware
+should be assigned exclusively to a single type (or to a single
+partition) in which case the hypervisor ACM enforcement enforces the
+types independently.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/readme.txt
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/readme.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,29 @@
+
+##
+# readme.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file is a toc for information regarding
+# the access control policy and tools in Xen.
+##
+
+1. policy.txt:
+
+ describes the general reasoning and examples for access
+ control policies in Xen
+
+
+2. install.txt
+
+ describes the activation of the access control framework
+ in Xen
+
+3. example.txt
+
+ describes the available tools for managing security policies
+ in Xen and the tools to label domains
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_compat.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_compat.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,14 @@
+/* secpol_compat.h
+ * 'translates' data types necessary to
+ * include <xen/acm.h>
+ */
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_xml2bin.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_xml2bin.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,1396 @@
+/****************************************************************
+ * secpol_xml2bin.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Author: Reiner Sailer <sailer@xxxxxxxxxx>
+ *
+ * Maintained:
+ * Reiner Sailer <sailer@xxxxxxxxxx>
+ * Ray Valdez <rvaldez@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * sHype policy translation tool. This tool takes an XML
+ * policy specification as input and produces a binary
+ * policy file that can be loaded into Xen through the
+ * ACM operations (secpol_tool loadpolicy) interface or at
+ * boot time (grub module parameter)
+ *
+ * indent -i4 -kr -nut
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <libxml/xmlschemas.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xmlreader.h>
+#include "secpol_compat.h"
+#include <xen/acm.h>
+
+#include "secpol_xml2bin.h"
+
+#define DEBUG 0
+
+/* primary / secondary policy component setting */
+enum policycomponent { CHWALL, STE, NULLPOLICY }
+ primary = NULLPOLICY, secondary = NULLPOLICY;
+
+/* general list element for ste and chwall type queues */
+struct type_entry {
+ TAILQ_ENTRY(type_entry) entries;
+ char *name; /* name of type from xml file */
+ type_t mapping; /* type mapping into 16bit */
+};
+
+TAILQ_HEAD(tailhead, type_entry) ste_head, chwall_head;
+
+/* general list element for all label queues */
+enum label_type { VM, RES, ANY };
+struct ssid_entry {
+ TAILQ_ENTRY(ssid_entry) entries;
+ char *name; /* label name */
+ enum label_type type; /* type: VM / RESOURCE LABEL */
+ u_int32_t num; /* ssid or referenced ssid */
+ int is_ref; /* if this entry references earlier ssid
number */
+ unsigned char *row; /* index of types (if not a reference) */
+};
+
+TAILQ_HEAD(tailhead_ssid, ssid_entry) ste_ssid_head, chwall_ssid_head,
+ conflictsets_head;
+struct ssid_entry *current_chwall_ssid_p = NULL;
+struct ssid_entry *current_ste_ssid_p = NULL;
+struct ssid_entry *current_conflictset_p = NULL;
+
+/* which label to assign to dom0 during boot */
+char *bootstrap_label;
+
+u_int32_t max_ste_ssids = 0;
+u_int32_t max_chwall_ssids = 0;
+u_int32_t max_chwall_labels = 0;
+u_int32_t max_ste_labels = 0;
+u_int32_t max_conflictsets = 0;
+
+char *current_ssid_name; /* store name until structure is allocated */
+char *current_conflictset_name; /* store name until structure is allocated */
+
+/* dynamic list of type mappings for STE */
+u_int32_t max_ste_types = 0;
+
+/* dynamic list of type mappings for CHWALL */
+u_int32_t max_chwall_types = 0;
+
+/* dynamic list of conflict sets */
+int max_conflict_set = 0;
+
+/* which policies are defined */
+int have_ste = 0;
+int have_chwall = 0;
+
+/* input/output file names */
+char *policy_filename = NULL,
+ *label_filename = NULL,
+ *binary_filename = NULL, *mapping_filename = NULL;
+
+void usage(char *prg)
+{
+ printf("usage:\n%s policyname[-policy.xml/-security_label_template.xml]\n",
+ prg);
+ exit(EXIT_FAILURE);
+}
+
+
+/***************** policy-related parsing *********************/
+
+char *type_by_mapping(struct tailhead *head, u_int32_t mapping)
+{
+ struct type_entry *np;
+ for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+ if (np->mapping == mapping)
+ return np->name;
+ return NULL;
+}
+
+
+struct type_entry *lookup(struct tailhead *head, char *name)
+{
+ struct type_entry *np;
+ for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+ if (!(strcmp(np->name, name)))
+ return np;
+ return NULL;
+}
+
+/* enforces single-entry lists */
+int add_entry(struct tailhead *head, char *name, type_t mapping)
+{
+ struct type_entry *e;
+ if (lookup(head, name))
+ {
+ printf("Error: Type >%s< defined more than once.\n", name);
+ return -EFAULT; /* already in the list */
+ }
+ if (!(e = malloc(sizeof(struct type_entry))))
+ return -ENOMEM;
+
+ e->name = name;
+ e->mapping = mapping;
+ TAILQ_INSERT_TAIL(head, e, entries);
+ return 0;
+}
+
+int totoken(char *tok)
+{
+ int i;
+ for (i = 0; token[i] != NULL; i++)
+ if (!strcmp(token[i], tok))
+ return i;
+ return -EFAULT;
+}
+
+/* conflictsets use the same data structure as ssids; since
+ * they are similar in structure (set of types)
+ */
+int init_next_conflictset(void)
+{
+ struct ssid_entry *conflictset = malloc(sizeof(struct ssid_entry));
+
+ if (!conflictset)
+ return -ENOMEM;
+
+ conflictset->name = current_conflictset_name;
+ conflictset->num = max_conflictsets++;
+ conflictset->is_ref = 0; /* n/a for conflictsets */
+ /**
+ * row: allocate one byte per type;
+ * [i] != 0 --> mapped type >i< is part of the conflictset
+ */
+ conflictset->row = malloc(max_chwall_types);
+ if (!conflictset->row)
+ return -ENOMEM;
+
+ memset(conflictset->row, 0, max_chwall_types);
+ TAILQ_INSERT_TAIL(&conflictsets_head, conflictset, entries);
+ current_conflictset_p = conflictset;
+ return 0;
+}
+
+int register_type(xmlNode * cur_node, xmlDocPtr doc, unsigned long state)
+{
+ xmlChar *text;
+ struct type_entry *e;
+
+
+ text = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!text)
+ {
+ printf("Error reading type name!\n");
+ return -EFAULT;
+ }
+
+ switch (state) {
+ case XML2BIN_stetype_S:
+ if (add_entry(&ste_head, (char *) text, max_ste_types))
+ {
+ xmlFree(text);
+ return -EFAULT;
+ }
+ max_ste_types++;
+ break;
+
+ case XML2BIN_chwalltype_S:
+ if (add_entry(&chwall_head, (char *) text, max_chwall_types))
+ {
+ xmlFree(text);
+ return -EFAULT;
+ }
+ max_chwall_types++;
+ break;
+
+ case XML2BIN_conflictsettype_S:
+ /* a) search the type in the chwall_type list */
+ e = lookup(&chwall_head, (char *) text);
+ if (e == NULL)
+ {
+ printf("CS type >%s< not a CHWALL type.\n", text);
+ xmlFree(text);
+ return -EFAULT;
+ }
+ /* b) add type entry to the current cs set */
+ if (current_conflictset_p->row[e->mapping])
+ {
+ printf("ERROR: Double entry of type >%s< in conflict set %d.\n",
+ text, current_conflictset_p->num);
+ xmlFree(text);
+ return -EFAULT;
+ }
+ current_conflictset_p->row[e->mapping] = 1;
+ break;
+
+ default:
+ printf("Incorrect type environment (state = %lx, text = %s).\n",
+ state, text);
+ xmlFree(text);
+ return -EFAULT;
+ }
+ return 0;
+}
+
+void set_component_type(xmlNode * cur_node, enum policycomponent pc)
+{
+ xmlChar *order;
+
+ if ((order = xmlGetProp(cur_node, (xmlChar *)
PRIMARY_COMPONENT_ATTR_NAME))) {
+ if (strcmp((char *) order, PRIMARY_COMPONENT))
+ {
+ printf("ERROR: Illegal attribut value >order=%s<.\n",
+ (char *) order);
+ xmlFree(order);
+ exit(EXIT_FAILURE);
+ }
+ if (primary != NULLPOLICY)
+ {
+ printf("ERROR: Primary Policy Component set twice!\n");
+ exit(EXIT_FAILURE);
+ }
+ primary = pc;
+ xmlFree(order);
+ }
+}
+
+void walk_policy(xmlNode * start, xmlDocPtr doc, unsigned long state)
+{
+ xmlNode *cur_node = NULL;
+ int code;
+
+ for (cur_node = start; cur_node; cur_node = cur_node->next)
+ {
+ if ((code = totoken((char *) cur_node->name)) < 0)
+ {
+ printf("Unknown token: >%s<. Aborting.\n", cur_node->name);
+ exit(EXIT_FAILURE);
+ }
+ switch (code) { /* adjust state to new state */
+ case XML2BIN_SECPOL:
+ case XML2BIN_STETYPES:
+ case XML2BIN_CHWALLTYPES:
+ case XML2BIN_CONFLICTSETS:
+ walk_policy(cur_node->children, doc, state | (1 << code));
+ break;
+
+ case XML2BIN_STE:
+ if (WRITTEN_AGAINST_ACM_STE_VERSION != ACM_STE_VERSION)
+ {
+ printf("ERROR: This program was written against another STE
version.\n");
+ exit(EXIT_FAILURE);
+ }
+ have_ste = 1;
+ set_component_type(cur_node, STE);
+ walk_policy(cur_node->children, doc, state | (1 << code));
+ break;
+
+ case XML2BIN_CHWALL:
+ if (WRITTEN_AGAINST_ACM_CHWALL_VERSION != ACM_CHWALL_VERSION)
+ {
+ printf("ERROR: This program was written against another CHWALL
version.\n");
+ exit(EXIT_FAILURE);
+ }
+ have_chwall = 1;
+ set_component_type(cur_node, CHWALL);
+ walk_policy(cur_node->children, doc, state | (1 << code));
+ break;
+
+ case XML2BIN_CSTYPE:
+ current_conflictset_name =
+ (char *) xmlGetProp(cur_node, (xmlChar *) "name");
+ if (!current_conflictset_name)
+ current_conflictset_name = "";
+
+ if (init_next_conflictset())
+ {
+ printf
+ ("ERROR: creating new conflictset structure failed.\n");
+ exit(EXIT_FAILURE);
+ }
+ walk_policy(cur_node->children, doc, state | (1 << code));
+ break;
+
+ case XML2BIN_TYPE:
+ if (register_type(cur_node, doc, state))
+ exit(EXIT_FAILURE);
+ /* type leaf */
+ break;
+
+ case XML2BIN_TEXT:
+ case XML2BIN_COMMENT:
+ case XML2BIN_POLICYHEADER:
+ /* leaf - nothing to do */
+ break;
+
+ default:
+ printf("Unkonwn token Error (%d)\n", code);
+ exit(EXIT_FAILURE);
+ }
+
+ }
+ return;
+}
+
+int create_type_mapping(xmlDocPtr doc)
+{
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ struct type_entry *te;
+ struct ssid_entry *se;
+ int i;
+
+ printf("Creating ssid mappings ...\n");
+
+ /* initialize the ste and chwall type lists */
+ TAILQ_INIT(&ste_head);
+ TAILQ_INIT(&chwall_head);
+ TAILQ_INIT(&conflictsets_head);
+
+ walk_policy(root_element, doc, XML2BIN_NULL);
+
+ /* determine primary/secondary policy component orders */
+ if ((primary == NULLPOLICY) && have_chwall)
+ primary = CHWALL; /* default if not set */
+ else if ((primary == NULLPOLICY) && have_ste)
+ primary = STE;
+
+ switch (primary) {
+
+ case CHWALL:
+ if (have_ste)
+ secondary = STE;
+ /* else default = NULLPOLICY */
+ break;
+
+ case STE:
+ if (have_chwall)
+ secondary = CHWALL;
+ /* else default = NULLPOLICY */
+ break;
+
+ default:
+ /* NULL/NULL policy */
+ break;
+ }
+
+ if (!DEBUG)
+ return 0;
+
+ /* print queues */
+ if (have_ste)
+ {
+ printf("STE-Type queue (%s):\n",
+ (primary == STE) ? "PRIMARY" : "SECONDARY");
+ for (te = ste_head.tqh_first; te != NULL;
+ te = te->entries.tqe_next)
+ printf("name=%22s, map=%x\n", te->name, te->mapping);
+ }
+ if (have_chwall)
+ {
+ printf("CHWALL-Type queue (%s):\n",
+ (primary == CHWALL) ? "PRIMARY" : "SECONDARY");
+ for (te = chwall_head.tqh_first; te != NULL;
+ te = te->entries.tqe_next)
+ printf("name=%s, map=%x\n", te->name, te->mapping);
+
+ printf("Conflictset queue (max=%d):\n", max_conflictsets);
+ for (se = conflictsets_head.tqh_first; se != NULL;
+ se = se->entries.tqe_next)
+ {
+ printf("conflictset name >%s<\n",
+ se->name ? se->name : "NONAME");
+ for (i = 0; i < max_chwall_types; i++)
+ if (se->row[i])
+ printf("#%x ", i);
+ printf("\n");
+ }
+ }
+ return 0;
+}
+
+
+/***************** template-related parsing *********************/
+
+/* add default ssid at head of ssid queues */
+int init_ssid_queues(void)
+{
+ struct ssid_entry *default_ssid_chwall, *default_ssid_ste;
+
+ default_ssid_chwall = malloc(sizeof(struct ssid_entry));
+ default_ssid_ste = malloc(sizeof(struct ssid_entry));
+
+ if ((!default_ssid_chwall) || (!default_ssid_ste))
+ return -ENOMEM;
+
+ /* default chwall ssid */
+ default_ssid_chwall->name = "DEFAULT";
+ default_ssid_chwall->num = max_chwall_ssids++;
+ default_ssid_chwall->is_ref = 0;
+ default_ssid_chwall->type = ANY;
+
+ default_ssid_chwall->row = malloc(max_chwall_types);
+
+ if (!default_ssid_chwall->row)
+ return -ENOMEM;
+
+ memset(default_ssid_chwall->row, 0, max_chwall_types);
+
+ TAILQ_INSERT_TAIL(&chwall_ssid_head, default_ssid_chwall, entries);
+ current_chwall_ssid_p = default_ssid_chwall;
+ max_chwall_labels++;
+
+ /* default ste ssid */
+ default_ssid_ste->name = "DEFAULT";
+ default_ssid_ste->num = max_ste_ssids++;
+ default_ssid_ste->is_ref = 0;
+ default_ssid_ste->type = ANY;
+
+ default_ssid_ste->row = malloc(max_ste_types);
+
+ if (!default_ssid_ste->row)
+ return -ENOMEM;
+
+ memset(default_ssid_ste->row, 0, max_ste_types);
+
+ TAILQ_INSERT_TAIL(&ste_ssid_head, default_ssid_ste, entries);
+ current_ste_ssid_p = default_ssid_ste;
+ max_ste_labels++;
+ return 0;
+}
+
+int init_next_chwall_ssid(unsigned long state)
+{
+ struct ssid_entry *ssid = malloc(sizeof(struct ssid_entry));
+
+ if (!ssid)
+ return -ENOMEM;
+
+ ssid->name = current_ssid_name;
+ ssid->num = max_chwall_ssids++;
+ ssid->is_ref = 0;
+
+ if (state & (1 << XML2BIN_VM))
+ ssid->type = VM;
+ else
+ ssid->type = RES;
+ /**
+ * row: allocate one byte per type;
+ * [i] != 0 --> mapped type >i< is part of the ssid
+ */
+ ssid->row = malloc(max_chwall_types);
+ if (!ssid->row)
+ return -ENOMEM;
+
+ memset(ssid->row, 0, max_chwall_types);
+ TAILQ_INSERT_TAIL(&chwall_ssid_head, ssid, entries);
+ current_chwall_ssid_p = ssid;
+ max_chwall_labels++;
+ return 0;
+}
+
+int init_next_ste_ssid(unsigned long state)
+{
+ struct ssid_entry *ssid = malloc(sizeof(struct ssid_entry));
+
+ if (!ssid)
+ return -ENOMEM;
+
+ ssid->name = current_ssid_name;
+ ssid->num = max_ste_ssids++;
+ ssid->is_ref = 0;
+
+ if (state & (1 << XML2BIN_VM))
+ ssid->type = VM;
+ else
+ ssid->type = RES;
+
+ /**
+ * row: allocate one byte per type;
+ * [i] != 0 --> mapped type >i< is part of the ssid
+ */
+ ssid->row = malloc(max_ste_types);
+ if (!ssid->row)
+ return -ENOMEM;
+
+ memset(ssid->row, 0, max_ste_types);
+ TAILQ_INSERT_TAIL(&ste_ssid_head, ssid, entries);
+ current_ste_ssid_p = ssid;
+ max_ste_labels++;
+
+ return 0;
+}
+
+
+/* adds a type to the current ssid */
+int add_type(xmlNode * cur_node, xmlDocPtr doc, unsigned long state)
+{
+ xmlChar *text;
+ struct type_entry *e;
+
+ text = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!text)
+ {
+ printf("Error reading type name!\n");
+ return -EFAULT;
+ }
+ /* same for all: 1. lookup type mapping, 2. mark type in ssid */
+ switch (state) {
+ case XML2BIN_VM_STE_S:
+ case XML2BIN_RES_STE_S:
+ /* lookup the type mapping and include the type mapping into the array
*/
+ if (!(e = lookup(&ste_head, (char *) text)))
+ {
+ printf("ERROR: unknown VM STE type >%s<.\n", text);
+ exit(EXIT_FAILURE);
+ }
+ if (current_ste_ssid_p->row[e->mapping])
+ printf("Warning: double entry of VM STE type >%s<.\n", text);
+
+ current_ste_ssid_p->row[e->mapping] = 1;
+ break;
+
+ case XML2BIN_VM_CHWALL_S:
+ /* lookup the type mapping and include the type mapping into the array
*/
+ if (!(e = lookup(&chwall_head, (char *) text)))
+ {
+ printf("ERROR: unknown VM CHWALL type >%s<.\n", text);
+ exit(EXIT_FAILURE);
+ }
+ if (current_chwall_ssid_p->row[e->mapping])
+ printf("Warning: double entry of VM CHWALL type >%s<.\n",
+ text);
+
+ current_chwall_ssid_p->row[e->mapping] = 1;
+ break;
+
+ default:
+ printf("Incorrect type environment (state = %lx, text = %s).\n",
+ state, text);
+ xmlFree(text);
+ return -EFAULT;
+ }
+ return 0;
+}
+
+void set_bootstrap_label(xmlNode * cur_node)
+{
+ xmlChar *order;
+
+ if ((order = xmlGetProp(cur_node, (xmlChar *) BOOTSTRAP_LABEL_ATTR_NAME)))
+ bootstrap_label = (char *)order;
+ else {
+ printf("ERROR: No bootstrap label defined!\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+void walk_labels(xmlNode * start, xmlDocPtr doc, unsigned long state)
+{
+ xmlNode *cur_node = NULL;
+ int code;
+
+ for (cur_node = start; cur_node; cur_node = cur_node->next)
+ {
+ if ((code = totoken((char *) cur_node->name)) < 0)
+ {
+ printf("Unkonwn token: >%s<. Aborting.\n", cur_node->name);
+ exit(EXIT_FAILURE);
+ }
+ switch (code) { /* adjust state to new state */
+
+ case XML2BIN_SUBJECTS:
+ set_bootstrap_label(cur_node);
+ /* fall through */
+ case XML2BIN_VM:
+ case XML2BIN_RES:
+ case XML2BIN_SECTEMPLATE:
+ case XML2BIN_OBJECTS:
+ walk_labels(cur_node->children, doc, state | (1 << code));
+ break;
+
+ case XML2BIN_STETYPES:
+ /* create new ssid entry to use and point current to it */
+ if (init_next_ste_ssid(state))
+ {
+ printf("ERROR: creating new ste ssid structure failed.\n");
+ exit(EXIT_FAILURE);
+ }
+ walk_labels(cur_node->children, doc, state | (1 << code));
+
+ break;
+
+ case XML2BIN_CHWALLTYPES:
+ /* create new ssid entry to use and point current to it */
+ if (init_next_chwall_ssid(state))
+ {
+ printf("ERROR: creating new chwall ssid structure failed.\n");
+ exit(EXIT_FAILURE);
+ }
+ walk_labels(cur_node->children, doc, state | (1 << code));
+
+ break;
+
+ case XML2BIN_TYPE:
+ /* add type to current ssid */
+ if (add_type(cur_node, doc, state))
+ exit(EXIT_FAILURE);
+ break;
+
+ case XML2BIN_NAME:
+ if ((state != XML2BIN_VM_S) && (state != XML2BIN_RES_S))
+ {
+ printf("ERROR: >name< out of VM/RES context.\n");
+ exit(EXIT_FAILURE);
+ }
+ current_ssid_name = (char *)
+ xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+
+ if (!current_ssid_name)
+ {
+ printf("ERROR: empty >name<!\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+
+ case XML2BIN_TEXT:
+ case XML2BIN_COMMENT:
+ case XML2BIN_LABELHEADER:
+ break;
+
+ default:
+ printf("Unkonwn token Error (%d)\n", code);
+ exit(EXIT_FAILURE);
+ }
+
+ }
+ return;
+}
+
+/* this function walks through a ssid queue
+ * and transforms double entries into references
+ * of the first definition (we need to keep the
+ * entry to map labels but we don't want double
+ * ssids in the binary policy
+ */
+void
+remove_doubles(struct tailhead_ssid *head,
+ u_int32_t max_types, u_int32_t * max_ssids)
+{
+ struct ssid_entry *np, *ni;
+
+ /* walk once through the list */
+ for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+ {
+ /* now search from the start until np for the same entry */
+ for (ni = head->tqh_first; ni != np; ni = ni->entries.tqe_next)
+ {
+ if (ni->is_ref)
+ continue;
+ if (memcmp(np->row, ni->row, max_types))
+ continue;
+ /* found one, set np reference to ni */
+ np->is_ref = 1;
+ np->num = ni->num;
+ (*max_ssids)--;
+ }
+ }
+
+ /* now minimize the ssid numbers used (doubles introduce holes) */
+ (*max_ssids) = 0; /* reset */
+
+ for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+ {
+ if (np->is_ref)
+ continue;
+
+ if (np->num != (*max_ssids)) {
+ /* first reset all later references to the new max_ssid */
+ for (ni = np->entries.tqe_next; ni != NULL; ni =
ni->entries.tqe_next)
+ {
+ if (ni->num == np->num)
+ ni->num = (*max_ssids);
+ }
+ /* now reset num */
+ np->num = (*max_ssids)++;
+ }
+ else
+ (*max_ssids)++;
+ }
+}
+
+/*
+ * will go away as soon as we have non-static bootstrap ssidref for dom0
+ */
+void fixup_bootstrap_label(struct tailhead_ssid *head,
+ u_int32_t max_types, u_int32_t * max_ssids)
+{
+ struct ssid_entry *np;
+ int i;
+
+ /* should not happen if xml / xsd checks work */
+ if (!bootstrap_label)
+ {
+ printf("ERROR: No bootstrap label defined.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* search bootstrap_label */
+ for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+ {
+ if (!strcmp(np->name, bootstrap_label))
+ {
+ break;
+ }
+ }
+
+ if (!np) {
+ /* bootstrap label not found */
+ printf("ERROR: Bootstrap label >%s< not found.\n", bootstrap_label);
+ exit(EXIT_FAILURE);
+ }
+
+ /* move this entry ahead in the list right after the default entry so it
+ * receives ssidref 1/1 */
+ TAILQ_REMOVE(head, np, entries);
+ TAILQ_INSERT_AFTER(head, head->tqh_first, np, entries);
+
+ /* renumber the ssids (we could also just switch places with 1st element)
*/
+ for (np = head->tqh_first, i=0; np != NULL; np = np->entries.tqe_next, i++)
+ np->num = i;
+
+}
+
+int create_ssid_mapping(xmlDocPtr doc)
+{
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ struct ssid_entry *np;
+ int i;
+
+ printf("Creating label mappings ...\n");
+ /* initialize the ste and chwall type lists */
+ TAILQ_INIT(&chwall_ssid_head);
+ TAILQ_INIT(&ste_ssid_head);
+
+ /* init with default ssids */
+ if (init_ssid_queues())
+ {
+ printf("ERROR adding default ssids.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* now walk the template DOM tree and fill in ssids */
+ walk_labels(root_element, doc, XML2BIN_NULL);
+
+ /*
+ * now sort bootstrap label to the head of the list
+ * (for now), dom0 assumes its label in the first
+ * defined ssidref (1/1). 0/0 is the default non-Label
+ */
+ if (have_chwall)
+ fixup_bootstrap_label(&chwall_ssid_head, max_chwall_types,
+ &max_chwall_ssids);
+ if (have_ste)
+ fixup_bootstrap_label(&ste_ssid_head, max_ste_types,
+ &max_ste_ssids);
+
+ /* remove any double entries (insert reference instead) */
+ if (have_chwall)
+ remove_doubles(&chwall_ssid_head, max_chwall_types,
+ &max_chwall_ssids);
+ if (have_ste)
+ remove_doubles(&ste_ssid_head, max_ste_types,
+ &max_ste_ssids);
+
+ if (!DEBUG)
+ return 0;
+
+ /* print queues */
+ if (have_chwall)
+ {
+ printf("CHWALL SSID queue (max ssidrefs=%d):\n", max_chwall_ssids);
+ np = NULL;
+ for (np = chwall_ssid_head.tqh_first; np != NULL;
+ np = np->entries.tqe_next)
+ {
+ printf("SSID #%02u (Label=%s)\n", np->num, np->name);
+ if (np->is_ref)
+ printf("REFERENCE");
+ else
+ for (i = 0; i < max_chwall_types; i++)
+ if (np->row[i])
+ printf("#%02d ", i);
+ printf("\n\n");
+ }
+ }
+ if (have_ste)
+ {
+ printf("STE SSID queue (max ssidrefs=%d):\n", max_ste_ssids);
+ np = NULL;
+ for (np = ste_ssid_head.tqh_first; np != NULL;
+ np = np->entries.tqe_next)
+ {
+ printf("SSID #%02u (Label=%s)\n", np->num, np->name);
+ if (np->is_ref)
+ printf("REFERENCE");
+ else
+ for (i = 0; i < max_ste_types; i++)
+ if (np->row[i])
+ printf("#%02d ", i);
+ printf("\n\n");
+ }
+ }
+ return 0;
+}
+
+/***************** writing the binary policy *********************/
+
+/*
+ * the mapping file is ascii-based since it will likely be used from
+ * within scripts (using awk, grep, etc.);
+ *
+ * We print from high-level to low-level information so that with one
+ * pass, any symbol can be resolved (e.g. Label -> types)
+ */
+int write_mapping(char *filename)
+{
+
+ struct ssid_entry *e;
+ struct type_entry *t;
+ int i;
+ FILE *file;
+
+ if ((file = fopen(filename, "w")) == NULL)
+ return -EIO;
+
+ fprintf(file, "MAGIC %08x\n", ACM_MAGIC);
+ fprintf(file, "POLICY %s\n",
+ basename(policy_filename));
+ fprintf(file, "BINARY %s\n",
+ basename(binary_filename));
+ if (have_chwall)
+ {
+ fprintf(file, "MAX-CHWALL-TYPES %08x\n", max_chwall_types);
+ fprintf(file, "MAX-CHWALL-SSIDS %08x\n", max_chwall_ssids);
+ fprintf(file, "MAX-CHWALL-LABELS %08x\n", max_chwall_labels);
+ }
+ if (have_ste)
+ {
+ fprintf(file, "MAX-STE-TYPES %08x\n", max_ste_types);
+ fprintf(file, "MAX-STE-SSIDS %08x\n", max_ste_ssids);
+ fprintf(file, "MAX-STE-LABELS %08x\n", max_ste_labels);
+ }
+ fprintf(file, "\n");
+
+ /* primary / secondary order for combined ssid synthesis/analysis
+ * if no primary is named, then chwall is primary */
+ switch (primary) {
+ case CHWALL:
+ fprintf(file, "PRIMARY CHWALL\n");
+ break;
+
+ case STE:
+ fprintf(file, "PRIMARY STE\n");
+ break;
+
+ default:
+ fprintf(file, "PRIMARY NULL\n");
+ break;
+ }
+
+ switch (secondary) {
+ case CHWALL:
+ fprintf(file, "SECONDARY CHWALL\n");
+ break;
+
+ case STE:
+ fprintf(file, "SECONDARY STE\n");
+ break;
+
+ default:
+ fprintf(file, "SECONDARY NULL\n");
+ break;
+ }
+ fprintf(file, "\n");
+
+ /* first labels to ssid mappings */
+ if (have_chwall)
+ {
+ for (e = chwall_ssid_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ fprintf(file, "LABEL->SSID %s CHWALL %-25s %8x\n",
+ (e->type ==
+ VM) ? "VM " : ((e->type == RES) ? "RES" : "ANY"),
+ e->name, e->num);
+ }
+ fprintf(file, "\n");
+ }
+ if (have_ste)
+ {
+ for (e = ste_ssid_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ fprintf(file, "LABEL->SSID %s STE %-25s %8x\n",
+ (e->type ==
+ VM) ? "VM " : ((e->type == RES) ? "RES" : "ANY"),
+ e->name, e->num);
+ }
+ fprintf(file, "\n");
+ }
+
+ /* second ssid to type mappings */
+ if (have_chwall)
+ {
+ for (e = chwall_ssid_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ if (e->is_ref)
+ continue;
+
+ fprintf(file, "SSID->TYPE CHWALL %08x", e->num);
+
+ for (i = 0; i < max_chwall_types; i++)
+ if (e->row[i])
+ fprintf(file, " %s", type_by_mapping(&chwall_head, i));
+
+ fprintf(file, "\n");
+ }
+ fprintf(file, "\n");
+ }
+ if (have_ste) {
+ for (e = ste_ssid_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ if (e->is_ref)
+ continue;
+
+ fprintf(file, "SSID->TYPE STE %08x", e->num);
+
+ for (i = 0; i < max_ste_types; i++)
+ if (e->row[i])
+ fprintf(file, " %s", type_by_mapping(&ste_head, i));
+
+ fprintf(file, "\n");
+ }
+ fprintf(file, "\n");
+ }
+ /* third type mappings */
+ if (have_chwall)
+ {
+ for (t = chwall_head.tqh_first; t != NULL; t = t->entries.tqe_next)
+ {
+ fprintf(file, "TYPE CHWALL %-25s %8x\n",
+ t->name, t->mapping);
+ }
+ fprintf(file, "\n");
+ }
+ if (have_ste) {
+ for (t = ste_head.tqh_first; t != NULL; t = t->entries.tqe_next)
+ {
+ fprintf(file, "TYPE STE %-25s %8x\n",
+ t->name, t->mapping);
+ }
+ fprintf(file, "\n");
+ }
+ fclose(file);
+ return 0;
+}
+
+unsigned char *write_chwall_binary(u_int32_t * len_chwall)
+{
+ unsigned char *buf, *ptr;
+ struct acm_chwall_policy_buffer *chwall_header;
+ u_int32_t len;
+ struct ssid_entry *e;
+ int i;
+
+ if (!have_chwall)
+ return NULL;
+
+ len = sizeof(struct acm_chwall_policy_buffer) +
+ sizeof(type_t) * max_chwall_types * max_chwall_ssids +
+ sizeof(type_t) * max_chwall_types * max_conflictsets;
+
+ buf = malloc(len);
+ ptr = buf;
+
+ if (!buf)
+ {
+ printf("ERROR: out of memory allocating chwall buffer.\n");
+ exit(EXIT_FAILURE);
+ }
+ /* chwall has 3 parts : header, types, conflictsets */
+
+ chwall_header = (struct acm_chwall_policy_buffer *) buf;
+ chwall_header->chwall_max_types = htonl(max_chwall_types);
+ chwall_header->chwall_max_ssidrefs = htonl(max_chwall_ssids);
+ chwall_header->policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+ chwall_header->policy_version = htonl(ACM_CHWALL_VERSION);
+ chwall_header->chwall_ssid_offset =
+ htonl(sizeof(struct acm_chwall_policy_buffer));
+ chwall_header->chwall_max_conflictsets = htonl(max_conflictsets);
+ chwall_header->chwall_conflict_sets_offset =
+ htonl(ntohl(chwall_header->chwall_ssid_offset) +
+ sizeof(domaintype_t) * max_chwall_ssids * max_chwall_types);
+ chwall_header->chwall_running_types_offset = 0; /* not set, only
retrieved */
+ chwall_header->chwall_conflict_aggregate_offset = 0; /* not set,
only retrieved */
+ ptr += sizeof(struct acm_chwall_policy_buffer);
+
+ /* types */
+ for (e = chwall_ssid_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ if (e->is_ref)
+ continue;
+
+ for (i = 0; i < max_chwall_types; i++)
+ ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+ ptr += sizeof(type_t) * max_chwall_types;
+ }
+
+ /* conflictsets */
+ for (e = conflictsets_head.tqh_first; e != NULL;
+ e = e->entries.tqe_next)
+ {
+ for (i = 0; i < max_chwall_types; i++)
+ ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+ ptr += sizeof(type_t) * max_chwall_types;
+ }
+
+ if ((ptr - buf) != len)
+ {
+ printf("ERROR: wrong lengths in %s.\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ (*len_chwall) = len;
+ return buf;
+}
+
+unsigned char *write_ste_binary(u_int32_t * len_ste)
+{
+ unsigned char *buf, *ptr;
+ struct acm_ste_policy_buffer *ste_header;
+ struct ssid_entry *e;
+ u_int32_t len;
+ int i;
+
+ if (!have_ste)
+ return NULL;
+
+ len = sizeof(struct acm_ste_policy_buffer) +
+ sizeof(type_t) * max_ste_types * max_ste_ssids;
+
+ buf = malloc(len);
+ ptr = buf;
+
+ if (!buf)
+ {
+ printf("ERROR: out of memory allocating chwall buffer.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* fill buffer */
+ ste_header = (struct acm_ste_policy_buffer *) buf;
+ ste_header->policy_version = htonl(ACM_STE_VERSION);
+ ste_header->policy_code = htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+ ste_header->ste_max_types = htonl(max_ste_types);
+ ste_header->ste_max_ssidrefs = htonl(max_ste_ssids);
+ ste_header->ste_ssid_offset =
+ htonl(sizeof(struct acm_ste_policy_buffer));
+
+ ptr += sizeof(struct acm_ste_policy_buffer);
+
+ /* types */
+ for (e = ste_ssid_head.tqh_first; e != NULL; e = e->entries.tqe_next)
+ {
+ if (e->is_ref)
+ continue;
+
+ for (i = 0; i < max_ste_types; i++)
+ ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+ ptr += sizeof(type_t) * max_ste_types;
+ }
+
+ if ((ptr - buf) != len)
+ {
+ printf("ERROR: wrong lengths in %s.\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ (*len_ste) = len;
+ return buf; /* for now */
+}
+
+int write_binary(char *filename)
+{
+ struct acm_policy_buffer header;
+ unsigned char *ste_buffer = NULL, *chwall_buffer = NULL;
+ u_int32_t len;
+ int fd;
+
+ u_int32_t len_ste = 0, len_chwall = 0; /* length of policy components
*/
+
+ /* open binary file */
+ if ((fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR))
<= 0)
+ return -EIO;
+
+ ste_buffer = write_ste_binary(&len_ste);
+ chwall_buffer = write_chwall_binary(&len_chwall);
+
+ /* determine primary component (default chwall) */
+ header.policy_version = htonl(ACM_POLICY_VERSION);
+ header.magic = htonl(ACM_MAGIC);
+
+ len = sizeof(struct acm_policy_buffer);
+ if (have_chwall)
+ len += len_chwall;
+ if (have_ste)
+ len += len_ste;
+ header.len = htonl(len);
+
+ header.primary_buffer_offset = htonl(sizeof(struct acm_policy_buffer));
+ if (primary == CHWALL)
+ {
+ header.primary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+ header.secondary_buffer_offset =
+ htonl((sizeof(struct acm_policy_buffer)) + len_chwall);
+ }
+ else if (primary == STE)
+ {
+ header.primary_policy_code =
+ htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+ header.secondary_buffer_offset =
+ htonl((sizeof(struct acm_policy_buffer)) + len_ste);
+ }
+ else
+ {
+ /* null policy */
+ header.primary_policy_code = htonl(ACM_NULL_POLICY);
+ header.secondary_buffer_offset =
+ htonl(header.primary_buffer_offset);
+ }
+
+ if (secondary == CHWALL)
+ header.secondary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+ else if (secondary == STE)
+ header.secondary_policy_code =
+ htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+ else
+ header.secondary_policy_code = htonl(ACM_NULL_POLICY);
+
+ if (write(fd, (void *) &header, sizeof(struct acm_policy_buffer))
+ != sizeof(struct acm_policy_buffer))
+ return -EIO;
+
+ /* write primary policy component */
+ if (primary == CHWALL)
+ {
+ if (write(fd, chwall_buffer, len_chwall) != len_chwall)
+ return -EIO;
+ }
+ else if (primary == STE)
+ {
+ if (write(fd, ste_buffer, len_ste) != len_ste)
+ return -EIO;
+ } else
+ ; /* NULL POLICY has no policy data */
+
+ /* write secondary policy component */
+ if (secondary == CHWALL)
+ {
+ if (write(fd, chwall_buffer, len_chwall) != len_chwall)
+ return -EIO;
+ }
+ else if (secondary == STE)
+ {
+ if (write(fd, ste_buffer, len_ste) != len_ste)
+ return -EIO;
+ } else; /* NULL POLICY has no policy data */
+
+ close(fd);
+ return 0;
+}
+
+int is_valid(xmlDocPtr doc)
+{
+ int err = 0;
+ xmlSchemaPtr schema_ctxt = NULL;
+ xmlSchemaParserCtxtPtr schemaparser_ctxt = NULL;
+ xmlSchemaValidCtxtPtr schemavalid_ctxt = NULL;
+
+ schemaparser_ctxt = xmlSchemaNewParserCtxt(SCHEMA_FILENAME);
+ schema_ctxt = xmlSchemaParse(schemaparser_ctxt);
+ schemavalid_ctxt = xmlSchemaNewValidCtxt(schema_ctxt);
+
+#ifdef VALIDATE_SCHEMA
+ /* only tested to be available from libxml2-2.6.20 upwards */
+ if ((err = xmlSchemaIsValid(schemavalid_ctxt)) != 1)
+ {
+ printf("ERROR: Invalid schema file %s (err=%d)\n",
+ SCHEMA_FILENAME, err);
+ err = -EIO;
+ goto out;
+ }
+ else
+ printf("XML Schema %s valid.\n", SCHEMA_FILENAME);
+#endif
+ if ((err = xmlSchemaValidateDoc(schemavalid_ctxt, doc)))
+ {
+ err = -EIO;
+ goto out;
+ }
+ out:
+ xmlSchemaFreeValidCtxt(schemavalid_ctxt);
+ xmlSchemaFreeParserCtxt(schemaparser_ctxt);
+ xmlSchemaFree(schema_ctxt);
+ return (err != 0) ? 0 : 1;
+}
+
+int main(int argc, char **argv)
+{
+ xmlDocPtr labeldoc = NULL;
+ xmlDocPtr policydoc = NULL;
+
+ int err = EXIT_SUCCESS;
+
+ char *file_prefix;
+ int prefix_len;
+
+ if (ACM_POLICY_VERSION != WRITTEN_AGAINST_ACM_POLICY_VERSION)
+ {
+ printf("ERROR: This program was written against an older ACM
version.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (argc != 2)
+ usage(basename(argv[0]));
+
+ prefix_len = strlen(POLICY_SUBDIR) +
+ strlen(argv[1]) + 1 /* "/" */ +
+ strlen(argv[1]) + 1 /* "/" */ ;
+
+ file_prefix = malloc(prefix_len);
+ policy_filename = malloc(prefix_len + strlen(POLICY_EXTENSION));
+ label_filename = malloc(prefix_len + strlen(LABEL_EXTENSION));
+ binary_filename = malloc(prefix_len + strlen(BINARY_EXTENSION));
+ mapping_filename = malloc(prefix_len + strlen(MAPPING_EXTENSION));
+
+ if (!file_prefix || !policy_filename || !label_filename ||
+ !binary_filename || !mapping_filename)
+ {
+ printf("ERROR allocating file name memory.\n");
+ goto out2;
+ }
+
+ /* create input/output filenames out of prefix */
+ strcat(file_prefix, POLICY_SUBDIR);
+ strcat(file_prefix, argv[1]);
+ strcat(file_prefix, "/");
+ strcat(file_prefix, argv[1]);
+
+ strcpy(policy_filename, file_prefix);
+ strcpy(label_filename, file_prefix);
+ strcpy(binary_filename, file_prefix);
+ strcpy(mapping_filename, file_prefix);
+
+ strcat(policy_filename, POLICY_EXTENSION);
+ strcat(label_filename, LABEL_EXTENSION);
+ strcat(binary_filename, BINARY_EXTENSION);
+ strcat(mapping_filename, MAPPING_EXTENSION);
+
+ labeldoc = xmlParseFile(label_filename);
+
+ if (labeldoc == NULL)
+ {
+ printf("Error: could not parse file %s.\n", argv[1]);
+ goto out2;
+ }
+
+ printf("Validating label file %s...\n", label_filename);
+ if (!is_valid(labeldoc))
+ {
+ printf("ERROR: Failed schema-validation for file %s (err=%d)\n",
+ label_filename, err);
+ goto out1;
+ }
+
+ policydoc = xmlParseFile(policy_filename);
+
+ if (policydoc == NULL)
+ {
+ printf("Error: could not parse file %s.\n", argv[1]);
+ goto out1;
+ }
+
+ printf("Validating policy file %s...\n", policy_filename);
+
+ if (!is_valid(policydoc))
+ {
+ printf("ERROR: Failed schema-validation for file %s (err=%d)\n",
+ policy_filename, err);
+ goto out;
+ }
+
+ /* Init queues and parse policy */
+ create_type_mapping(policydoc);
+
+ /* create ssids */
+ create_ssid_mapping(labeldoc);
+
+ /* write label mapping file */
+ if (write_mapping(mapping_filename))
+ {
+ printf("ERROR: writing mapping file %s.\n", mapping_filename);
+ goto out;
+ }
+
+ /* write binary file */
+ if (write_binary(binary_filename))
+ {
+ printf("ERROR: writing binary file %s.\n", binary_filename);
+ goto out;
+ }
+
+ /* write stats */
+ if (have_chwall)
+ {
+ printf("Max chwall labels: %u\n", max_chwall_labels);
+ printf("Max chwall-types: %u\n", max_chwall_types);
+ printf("Max chwall-ssids: %u\n", max_chwall_ssids);
+ }
+
+ if (have_ste)
+ {
+ printf("Max ste labels: %u\n", max_ste_labels);
+ printf("Max ste-types: %u\n", max_ste_types);
+ printf("Max ste-ssids: %u\n", max_ste_ssids);
+ }
+ /* cleanup */
+ out:
+ xmlFreeDoc(policydoc);
+ out1:
+ xmlFreeDoc(labeldoc);
+ out2:
+ xmlCleanupParser();
+ return err;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_xml2bin.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_xml2bin.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,139 @@
+/****************************************************************
+ * secpol_xml2bin.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Authors:
+ * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ */
+#define POLICY_SUBDIR "policies/"
+#define POLICY_EXTENSION "-security_policy.xml"
+#define LABEL_EXTENSION "-security_label_template.xml"
+#define BINARY_EXTENSION ".bin"
+#define MAPPING_EXTENSION ".map"
+#define PRIMARY_COMPONENT_ATTR_NAME "order"
+#define BOOTSTRAP_LABEL_ATTR_NAME "bootstrap"
+#define PRIMARY_COMPONENT "PrimaryPolicyComponent"
+#define SCHEMA_FILENAME "policies/security_policy.xsd"
+
+/* basic states (used as 1 << X) */
+#define XML2BIN_SECPOL 0 /* policy tokens */
+#define XML2BIN_STE 1
+#define XML2BIN_CHWALL 2
+#define XML2BIN_CONFLICTSETS 3
+#define XML2BIN_CSTYPE 4
+
+#define XML2BIN_SECTEMPLATE 5 /* label tokens */
+#define XML2BIN_POLICYHEADER 6
+#define XML2BIN_LABELHEADER 7
+#define XML2BIN_SUBJECTS 8
+#define XML2BIN_OBJECTS 9
+#define XML2BIN_VM 10
+#define XML2BIN_RES 11
+
+#define XML2BIN_STETYPES 12 /* shared tokens */
+#define XML2BIN_CHWALLTYPES 13
+#define XML2BIN_TYPE 14
+#define XML2BIN_NAME 15
+#define XML2BIN_TEXT 16
+#define XML2BIN_COMMENT 17
+
+/* type "data type" (currently 16bit) */
+typedef u_int16_t type_t;
+
+/* list of known elements and token equivalent *
+ * state constants and token positions must be *
+ * in sync for correct state recognition */
+
+char *token[20] = /* parser triggers */
+{
+ [0] = "SecurityPolicyDefinition", /* policy xml */
+ [1] = "SimpleTypeEnforcement",
+ [2] = "ChineseWall",
+ [3] = "ConflictSets",
+ [4] = "Conflict", /* label-template xml */
+ [5] = "SecurityLabelTemplate",
+ [6] = "PolicyHeader",
+ [7] = "LabelHeader",
+ [8] = "SubjectLabels",
+ [9] = "ObjectLabels",
+ [10] = "VirtualMachineLabel",
+ [11] = "ResourceLabel",
+ [12] = "SimpleTypeEnforcementTypes", /* common tags */
+ [13] = "ChineseWallTypes",
+ [14] = "Type",
+ [15] = "Name",
+ [16] = "text",
+ [17] = "comment",
+ [18] = NULL,
+};
+
+/* important combined states */
+#define XML2BIN_NULL 0
+
+/* policy xml parsing states _S */
+
+/* e.g., here we are in a <secpol,ste,stetypes> environment, *
+ * so when finding a type element, we know where to put it */
+#define XML2BIN_stetype_S ((1 << XML2BIN_SECPOL) | \
+ (1 << XML2BIN_STE) | \
+ (1 << XML2BIN_STETYPES))
+
+#define XML2BIN_chwalltype_S ((1 << XML2BIN_SECPOL) | \
+ (1 << XML2BIN_CHWALL) | \
+ (1 << XML2BIN_CHWALLTYPES))
+
+#define XML2BIN_conflictset_S ((1 << XML2BIN_SECPOL) | \
+ (1 << XML2BIN_CHWALL) | \
+ (1 << XML2BIN_CONFLICTSETS))
+
+#define XML2BIN_conflictsettype_S ((1 << XML2BIN_SECPOL) | \
+ (1 << XML2BIN_CHWALL) | \
+ (1 << XML2BIN_CONFLICTSETS) | \
+ (1 << XML2BIN_CSTYPE))
+
+
+/* label xml states */
+#define XML2BIN_VM_S ((1 << XML2BIN_SECTEMPLATE) | \
+ (1 << XML2BIN_SUBJECTS) | \
+ (1 << XML2BIN_VM))
+
+#define XML2BIN_RES_S ((1 << XML2BIN_SECTEMPLATE) | \
+ (1 << XML2BIN_OBJECTS) | \
+ (1 << XML2BIN_RES))
+
+#define XML2BIN_VM_STE_S ((1 << XML2BIN_SECTEMPLATE) | \
+ (1 << XML2BIN_SUBJECTS) | \
+ (1 << XML2BIN_VM) | \
+ (1 << XML2BIN_STETYPES))
+
+#define XML2BIN_VM_CHWALL_S ((1 << XML2BIN_SECTEMPLATE) | \
+ (1 << XML2BIN_SUBJECTS) | \
+ (1 << XML2BIN_VM) | \
+ (1 << XML2BIN_CHWALLTYPES))
+
+#define XML2BIN_RES_STE_S ((1 << XML2BIN_SECTEMPLATE) | \
+ (1 << XML2BIN_OBJECTS) | \
+ (1 << XML2BIN_RES) | \
+ (1 << XML2BIN_STETYPES))
+
+
+
+/* check versions of headers against which the
+ * xml2bin translation tool was written
+ */
+
+/* protects from unnoticed changes in struct acm_policy_buffer */
+#define WRITTEN_AGAINST_ACM_POLICY_VERSION 1
+
+/* protects from unnoticed changes in struct acm_chwall_policy_buffer */
+#define WRITTEN_AGAINST_ACM_CHWALL_VERSION 1
+
+/* protects from unnoticed changes in struct acm_ste_policy_buffer */
+#define WRITTEN_AGAINST_ACM_STE_VERSION 1
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/setlabel.sh
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/setlabel.sh Thu Aug 25 22:53:20 2005
@@ -0,0 +1,345 @@
+#!/bin/sh
+# *
+# * setlabel
+# *
+# * Copyright (C) 2005 IBM Corporation
+# *
+# * Authors:
+# * Stefan Berger <stefanb@xxxxxxxxxx>
+# *
+# * This program is free software; you can redistribute it and/or
+# * modify it under the terms of the GNU General Public License as
+# * published by the Free Software Foundation, version 2 of the
+# * License.
+# *
+# * 'setlabel' labels virtual machine (domain) configuration files with
+# * security identifiers that can be enforced in Xen.
+# *
+# * 'setlabel -?' shows the usage of the program
+# *
+# * 'setlabel -l vmconfig-file' lists all available labels (only VM
+# * labels are used right now)
+# *
+# * 'setlabel vmconfig-file security-label map-file' inserts the 'ssidref'
+# * that corresponds to the security-label under the
+# * current policy (if policy changes, 'label'
+# * must be re-run over the configuration files;
+# * map-file is created during policy translation and
+# * is found in the policy's directory
+#
+
+if [ -z "$runbash" ]; then
+ runbash="1"
+ export runbash
+ exec sh -c "bash $0 $*"
+fi
+
+
+usage ()
+{
+ echo "Usage: $0 [Option] <vmfile> <label> <policy name> "
+ echo " or $0 -l <policy name>"
+ echo ""
+ echo "Valid Options are:"
+ echo "-r : to relabel a file without being prompted"
+ echo ""
+ echo "vmfile : XEN vm configuration file"
+ echo "label : the label to map"
+ echo "policy name : the name of the policy, i.e. 'chwall'"
+ echo ""
+ echo "-l <policy name> is used to show valid labels in the map file"
+ echo ""
+}
+
+
+findMapFile ()
+{
+ mapfile="./$1.map"
+ if [ -r "$mapfile" ]; then
+ return 1
+ fi
+
+ mapfile="./policies/$1/$1.map"
+ if [ -r "$mapfile" ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+showLabels ()
+{
+ mapfile=$1
+ if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then
+ echo "Cannot read from vm configuration file $vmfile."
+ return -1
+ fi
+
+ getPrimaryPolicy $mapfile
+ getSecondaryPolicy $mapfile
+
+ echo "The following labels are available:"
+ let line=1
+ while [ 1 ]; do
+ ITEM=`cat $mapfile | \
+ awk -vline=$line \
+ -vprimary=$primary \
+ '{ \
+ if ($1 == "LABEL->SSID" && \
+ $2 == "VM" && \
+ $3 == primary ) { \
+ ctr++; \
+ if (ctr == line) { \
+ print $4; \
+ } \
+ } \
+ } END { \
+ }'`
+
+ if [ "$ITEM" == "" ]; then
+ break
+ fi
+ if [ "$secondary" != "NULL" ]; then
+ LABEL=`cat $mapfile | \
+ awk -vitem=$ITEM \
+ '{
+ if ($1 == "LABEL->SSID" && \
+ $2 == "VM" && \
+ $3 == "CHWALL" && \
+ $4 == item ) { \
+ result = item; \
+ } \
+ } END { \
+ print result \
+ }'`
+ else
+ LABEL=$ITEM
+ fi
+
+ if [ "$LABEL" != "" ]; then
+ echo "$LABEL"
+ found=1
+ fi
+ let line=line+1
+ done
+ if [ "$found" != "1" ]; then
+ echo "No labels found."
+ fi
+}
+
+getPrimaryPolicy ()
+{
+ mapfile=$1
+ primary=`cat $mapfile | \
+ awk ' \
+ { \
+ if ( $1 == "PRIMARY" ) { \
+ res=$2; \
+ } \
+ } END { \
+ print res; \
+ } '`
+}
+
+getSecondaryPolicy ()
+{
+ mapfile=$1
+ secondary=`cat $mapfile | \
+ awk ' \
+ { \
+ if ( $1 == "SECONDARY" ) { \
+ res=$2; \
+ } \
+ } END { \
+ print res; \
+ } '`
+}
+
+
+getDefaultSsid ()
+{
+ mapfile=$1
+ pol=$2
+ RES=`cat $mapfile \
+ awk -vpol=$pol \
+ { \
+ if ($1 == "LABEL->SSID" && \
+ $2 == "ANY" && \
+ $3 == pol && \
+ $4 == "DEFAULT" ) {\
+ res=$5; \
+ } \
+ } END { \
+ printf "%04x", strtonum(res) \
+ }'`
+ echo "default NULL mapping is $RES"
+ defaultssid=$RES
+}
+
+relabel ()
+{
+ vmfile=$1
+ label=$2
+ mapfile=$3
+ mode=$4
+
+ if [ ! -r "$vmfile" ]; then
+ echo "Cannot read from vm configuration file $vmfile."
+ return -1
+ fi
+
+ if [ ! -w "$vmfile" ]; then
+ echo "Cannot write to vm configuration file $vmfile."
+ return -1
+ fi
+
+ if [ ! -r "$mapfile" ] ; then
+ echo "Cannot read mapping file $mapfile."
+ return -1
+ fi
+
+ # Determine which policy is primary, which sec.
+ getPrimaryPolicy $mapfile
+ getSecondaryPolicy $mapfile
+
+ # Calculate the primary policy's SSIDREF
+ if [ "$primary" == "NULL" ]; then
+ SSIDLO="0000"
+ else
+ SSIDLO=`cat $mapfile | \
+ awk -vlabel=$label \
+ -vprimary=$primary \
+ '{ \
+ if ( $1 == "LABEL->SSID" && \
+ $2 == "VM" && \
+ $3 == primary && \
+ $4 == label ) { \
+ result=$5 \
+ } \
+ } END { \
+ if (result != "" ) \
+ {printf "%04x", strtonum(result)}\
+ }'`
+ fi
+
+ # Calculate the secondary policy's SSIDREF
+ if [ "$secondary" == "NULL" ]; then
+ SSIDHI="0000"
+ else
+ SSIDHI=`cat $mapfile | \
+ awk -vlabel=$label \
+ -vsecondary=$secondary \
+ '{ \
+ if ( $1 == "LABEL->SSID" && \
+ $2 == "VM" && \
+ $3 == secondary && \
+ $4 == label ) { \
+ result=$5 \
+ } \
+ } END { \
+ if (result != "" ) \
+ {printf "%04x", strtonum(result)}\
+ }'`
+ fi
+
+ if [ "$SSIDLO" == "" -o \
+ "$SSIDHI" == "" ]; then
+ echo "Could not map the given label '$label'."
+ return -1
+ fi
+
+ ACM_POLICY=`cat $mapfile | \
+ awk ' { if ( $1 == "POLICY" ) { \
+ result=$2 \
+ } \
+ } \
+ END { \
+ if (result != "") { \
+ printf result \
+ } \
+ }'`
+
+ if [ "$ACM_POLICY" == "" ]; then
+ echo "Could not find 'POLICY' entry in map file."
+ return -1
+ fi
+
+ SSIDREF="0x$SSIDHI$SSIDLO"
+
+ if [ "$mode" != "relabel" ]; then
+ RES=`cat $vmfile | \
+ awk '{ \
+ if ( substr($1,0,7) == "ssidref" ) {\
+ print $0; \
+ } \
+ }'`
+ if [ "$RES" != "" ]; then
+ echo "Do you want to overwrite the existing mapping
($RES)? (y/N)"
+ read user
+ if [ "$user" != "y" -a "$user" != "Y" ]; then
+ echo "Aborted."
+ return 0
+ fi
+ fi
+ fi
+
+ #Write the output
+ vmtmp1="/tmp/__setlabel.tmp1"
+ vmtmp2="/tmp/__setlabel.tmp2"
+ touch $vmtmp1
+ touch $vmtmp2
+ if [ ! -w "$vmtmp1" -o ! -w "$vmtmp2" ]; then
+ echo "Cannot create temporary files. Aborting."
+ return -1
+ fi
+ RES=`sed -e '/^#ACM_POLICY/d' $vmfile > $vmtmp1`
+ RES=`sed -e '/^#ACM_LABEL/d' $vmtmp1 > $vmtmp2`
+ RES=`sed -e '/^ssidref/d' $vmtmp2 > $vmtmp1`
+ echo "#ACM_POLICY=$ACM_POLICY" >> $vmtmp1
+ echo "#ACM_LABEL=$label" >> $vmtmp1
+ echo "ssidref = $SSIDREF" >> $vmtmp1
+ mv -f $vmtmp1 $vmfile
+ rm -rf $vmtmp1 $vmtmp2
+ echo "Mapped label '$label' to ssidref '$SSIDREF'."
+}
+
+
+
+if [ "$1" == "-r" ]; then
+ mode="relabel"
+ shift
+elif [ "$1" == "-l" ]; then
+ mode="show"
+ shift
+elif [ "$1" == "-?" ]; then
+ mode="usage"
+fi
+
+if [ "$mode" == "show" ]; then
+ if [ "$1" == "" ]; then
+ usage
+ exit -1;
+ fi
+ findMapFile $1
+ res=$?
+ if [ "$res" != "0" ]; then
+ showLabels $mapfile
+ else
+ echo "Could not find map file for policy '$1'."
+ fi
+elif [ "$mode" == "usage" ]; then
+ usage
+else
+ if [ "$3" == "" ]; then
+ usage
+ exit -1;
+ fi
+ findMapFile $3
+ res=$?
+ if [ "$res" != "0" ]; then
+ relabel $1 $2 $mapfile $mode
+ else
+ echo "Could not find map file for policy '$3'."
+ fi
+
+fi
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/updategrub.sh
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/security/updategrub.sh Thu Aug 25 22:53:20 2005
@@ -0,0 +1,171 @@
+#!/bin/sh
+# *
+# * updategrub
+# *
+# * Copyright (C) 2005 IBM Corporation
+# *
+# * Authors:
+# * Stefan Berger <stefanb@xxxxxxxxxx>
+# *
+# * This program is free software; you can redistribute it and/or
+# * modify it under the terms of the GNU General Public License as
+# * published by the Free Software Foundation, version 2 of the
+# * License.
+# *
+# *
+#
+
+if [ -z "$runbash" ]; then
+ runbash="1"
+ export runbash
+ exec sh -c "bash $0 $*"
+ exit
+fi
+
+
+# Show usage of this program
+usage ()
+{
+ echo "Usage: $0 <policy name> <root of xen repository>"
+ echo ""
+ echo "<policy name> : The name of the policy, i.e. xen_null"
+ echo "<root of xen repository> : The root of the XEN repositrory."
+ echo ""
+}
+
+# This function sets the global variable 'linux'
+# to the name of the linux kernel that was compiled
+# For now a pattern should do the trick
+getLinuxVersion ()
+{
+ path=$1
+ linux=""
+ for f in $path/linux-*-xen0 ; do
+ versionfile=$f/include/linux/version.h
+ if [ -r $versionfile ]; then
+ lnx=`cat $versionfile | \
+ grep UTS_RELEASE | \
+ awk '{ \
+ len=length($3); \
+ print substr($3,2,len-2) }'`
+ fi
+ if [ "$lnx" != "" ]; then
+ linux="[./0-9a-zA-z]*$lnx"
+ return;
+ fi
+ done
+
+ #Last resort.
+ linux="vmlinuz-2.[45678].[0-9]*[.0-9]*-xen0$"
+}
+
+#Return where the grub.conf file is.
+#I only know of one place it can be.
+findGrubConf()
+{
+ grubconf="/boot/grub/grub.conf"
+ if [ -w $grubconf ]; then
+ return 1
+ fi
+ return 0
+}
+
+
+#Update the grub configuration file.
+#Search for existing entries and replace the current
+#policy entry with the policy passed to this script
+#
+#Arguments passed to this function
+# 1st : the grub configuration file
+# 2nd : the binary policy file name
+# 3rd : the name or pattern of the linux kernel name to match
+#
+# The algorithm here is based on pattern matching
+# and is working correctly if
+# - under a title a line beginning with 'kernel' is found
+# whose following item ends with "xen.gz"
+# Example: kernel /xen.gz dom0_mem=....
+# - a module line matching the 3rd parameter is found
+#
+updateGrub ()
+{
+ grubconf=$1
+ policyfile=$2
+ linux=$3
+
+ tmpfile="/tmp/new_grub.conf"
+
+ cat $grubconf | \
+ awk -vpolicy=$policyfile \
+ -vlinux=$linux '{ \
+ if ( $1 == "title" ) { \
+ kernelfound = 0; \
+ if ( policymaycome == 1 ){ \
+ printf ("\tmodule %s%s\n", path, policy); \
+ } \
+ policymaycome = 0; \
+ } \
+ else if ( $1 == "kernel" ) { \
+ if ( match($2,"xen.gz$") ) { \
+ path=substr($2,1,RSTART-1); \
+ kernelfound = 1; \
+ } \
+ } \
+ else if ( $1 == "module" && \
+ kernelfound == 1 && \
+ match($2,linux) ) { \
+ policymaycome = 1; \
+ } \
+ else if ( $1 == "module" && \
+ kernelfound == 1 && \
+ policymaycome == 1 && \
+ match($2,"[0-9a-zA-Z]*.bin$") ) { \
+ printf ("\tmodule %s%s\n", path, policy); \
+ policymaycome = 0; \
+ kernelfound = 0; \
+ dontprint = 1; \
+ } \
+ else if ( $1 == "" && \
+ kernelfound == 1 && \
+ policymaycome == 1) { \
+ dontprint = 1; \
+ } \
+ if (dontprint == 0) { \
+ printf ("%s\n", $0); \
+ } \
+ dontprint = 0; \
+ } END { \
+ if ( policymaycome == 1 ) { \
+ printf ("\tmodule %s%s\n", path, policy); \
+ } \
+ }' > $tmpfile
+ if [ ! -r $tmpfile ]; then
+ echo "Could not create temporary file! Aborting."
+ exit -1
+ fi
+ mv -f $tmpfile $grubconf
+}
+
+if [ "$1" == "" -o "$2" == "" ]; then
+ usage
+ exit -1
+fi
+
+if [ "$1" == "-?" ]; then
+ usage
+ exit 0
+fi
+
+policy=$1
+policyfile=$policy.bin
+
+getLinuxVersion $2
+
+findGrubConf
+ERR=$?
+if [ $ERR -eq 0 ]; then
+ echo "Could not find grub.conf. Aborting."
+ exit -1
+fi
+
+updateGrub $grubconf $policyfile $linux
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,13 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+SUBDIRS :=
+SUBDIRS += libxenstat
+SUBDIRS += xentop
+
+.PHONY: all install clean
+
+all install clean:
+ @set -e; for subdir in $(SUBDIRS); do \
+ $(MAKE) -C $$subdir $@; \
+ done
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/COPYING
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/COPYING Thu Aug 25 22:53:20 2005
@@ -0,0 +1,510 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard. To achieve this, non-free programs must
+be allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least
+ three years, to give the same user the materials specified in
+ Subsection 6a, above, for a charge no more than the cost of
+ performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+ Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,142 @@
+# libxenstat: statistics-collection library for Xen
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755 -D
+INSTALL_DATA = $(INSTALL) -m0644 -D
+
+prefix=/usr
+includedir=$(prefix)/include
+libdir=$(prefix)/lib
+
+LDCONFIG=ldconfig
+MAKE_LINK=ln -sf
+
+MAJOR=0
+MINOR=0
+
+LIB=src/libxenstat.a
+SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR)
+SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so
+OBJECTS=src/xenstat.o src/xen-interface.o
+SONAME_FLAGS=-Wl,-soname -Wl,libxenstat.so.$(MAJOR)
+
+WARN_FLAGS=-Wall -Werror
+
+CFLAGS+=-Isrc
+CFLAGS+=-I$(XEN_ROOT)/xen/include/public
+CFLAGS+=-I$(LINUX_ROOT)/include/asm-xen/linux-public/
+LDFLAGS+=-Lsrc
+
+all: $(LIB)
+
+$(LIB): $(OBJECTS)
+ $(AR) rc $@ $^
+ $(RANLIB) $@
+
+$(SHLIB): $(OBJECTS)
+ $(CC) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS)
+
+src/xenstat.o: src/xenstat.c src/xenstat.h src/xen-interface.h
+ $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/xen-interface.o: src/xen-interface.c src/xen-interface.h
+ $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/libxenstat.so.$(MAJOR): $(LIB)
+ $(MAKE_LINK) $(<F) $@
+
+src/libxenstat.so: src/libxenstat.so.$(MAJOR)
+ $(MAKE_LINK) $(<F) $@
+
+install: all
+#install: all
+# $(INSTALL_DATA) src/xenstat.h $(DESTDIR)$(includedir)/xenstat.h
+# $(INSTALL_PROG) $(LIB) $(DESTDIR)$(libdir)/libxenstat.a
+# $(INSTALL_PROG) $(SHLIB) \
+# $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR).$(MINOR)
+# $(MAKE_LINK) libxenstat.so.$(MAJOR).$(MINOR) \
+# $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR)
+# $(MAKE_LINK) libxenstat.so.$(MAJOR) \
+# $(DESTDIR)$(libdir)/libxenstat.so
+# -$(LDCONFIG)
+
+PYLIB=bindings/swig/python/_xenstat.so
+PYMOD=bindings/swig/python/xenstat.py
+PYSRC=bindings/swig/python/_xenstat.c
+PERLLIB=bindings/swig/perl/xenstat.so
+PERLMOD=bindings/swig/perl/xenstat.pm
+PERLSRC=bindings/swig/perl/xenstat.c
+BINDINGS=$(PYLIB) $(PYMOD) $(PERLLIB) $(PERLMOD)
+BINDINGSRC=$(PYSRC) $(PERLSRC)
+
+# The all-bindings target builds all the language bindings
+all-bindings: perl-bindings python-bindings
+
+# The install-bindings target installs all the language bindings
+install-bindings: install-perl-bindings install-python-bindings
+
+$(BINDINGS): $(SHLIB) $(SHLIB_LINKS) src/xenstat.h
+
+SWIG_FLAGS=-module xenstat -Isrc
+
+# Python bindings
+PYTHON_VERSION=2.3
+PYTHON_FLAGS=-I/usr/include/python$(PYTHON_VERSION) -lpython$(PYTHON_VERSION)
+$(PYSRC) $(PYMOD): bindings/swig/xenstat.i
+ swig -python $(SWIG_FLAGS) -outdir $(@D) -o $(PYSRC) $<
+
+$(PYLIB): $(PYSRC)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) -shared -lxenstat -o $@ $<
+
+python-bindings: $(PYLIB) $(PYMOD)
+
+pythonlibdir=$(prefix)/lib/python$(PYTHON_VERSION)/site-packages
+install-python-bindings: $(PYLIB) $(PYMOD)
+ $(INSTALL_PROG) $(PYLIB) $(DESTDIR)$(pythonlibdir)/_xenstat.so
+ $(INSTALL_PROG) $(PYMOD) $(DESTDIR)$(pythonlibdir)/xenstat.py
+
+ifeq ($(XENSTAT_PYTHON_BINDINGS),y)
+all: python-bindings
+install: install-python-bindings
+endif
+
+# Perl bindings
+PERL_FLAGS=`perl -MConfig -e 'print "$$Config{ccflags}
-I$$Config{archlib}/CORE";'`
+$(PERLSRC) $(PERLMOD): bindings/swig/xenstat.i
+ swig -perl $(SWIG_FLAGS) -outdir $(@D) -o $(PERLSRC) $<
+
+$(PERLLIB): $(PERLSRC)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $<
+
+perl-bindings: $(PERLLIB) $(PERLMOD)
+
+perllibdir=$(prefix)/lib/perl5
+perlmoddir=$(prefix)/share/perl5
+install-perl-bindings: $(PERLLIB) $(PERLMOD)
+ $(INSTALL_PROG) $(PERLLIB) $(DESTDIR)$(perllibdir)/xenstat.so
+ $(INSTALL_PROG) $(PERLMOD) $(DESTDIR)$(perlmoddir)/xenstat.pm
+
+ifeq ($(XENSTAT_PERL_BINDINGS),y)
+all: perl-bindings
+install: install-perl-bindings
+endif
+
+clean:
+ rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS) \
+ $(BINDINGS) $(BINDINGSRC)
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstat/libxenstat/bindings/swig/perl/.empty
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/perl/.empty Thu Aug 25
22:53:20 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control
systems from removing the directory.
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstat/libxenstat/bindings/swig/python/.empty
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/python/.empty Thu Aug 25
22:53:20 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control
systems from removing the directory.
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstat/libxenstat/bindings/swig/xenstat.i
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/xenstat.i Thu Aug 25 22:53:20 2005
@@ -0,0 +1,8 @@
+%module xenstat_swig
+%{
+/* Includes the header in the wrapper code */
+#include "xenstat.h"
+%}
+
+/* Parse the header file to generate wrappers */
+%include "xenstat.h"
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstat/libxenstat/src/xen-interface.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,204 @@
+/* xen-interface.c
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ * Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "xen-interface.h"
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "version.h"
+#include "privcmd.h"
+#include "xen.h"
+
+struct xi_handle {
+ int fd;
+};
+
+/* Initialize for xen-interface. Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init()
+{
+ xi_handle *handle;
+
+ handle = (xi_handle *)calloc(1, sizeof(xi_handle));
+ if (handle == NULL)
+ return NULL;
+
+ handle->fd = open("/proc/xen/privcmd", O_RDWR);
+ if (handle->fd < 0) {
+ perror("Couldn't open /proc/xen/privcmd");
+ free(handle);
+ return NULL;
+ }
+
+ return handle;
+}
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle)
+{
+ close (handle->fd);
+ free (handle);
+}
+
+/* Make simple xen version hypervisor calls */
+static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum,
xen_extraversion_t *ver)
+{
+ privcmd_hypercall_t privcmd;
+ multicall_entry_t multicall[2];
+ int ret = 0;
+
+ /* set up for doing hypercall */
+ privcmd.op = __HYPERVISOR_multicall;
+ privcmd.arg[0] = (unsigned long)multicall;
+ privcmd.arg[1] = 2;
+
+ /* first one to get xen version number */
+ multicall[0].op = __HYPERVISOR_xen_version;
+ multicall[0].args[0] = (unsigned long)XENVER_version;
+
+ /* second to get xen version flag */
+ multicall[1].op = __HYPERVISOR_xen_version;
+ multicall[1].args[0] = (unsigned long)XENVER_extraversion;
+ multicall[1].args[1] = (unsigned long)ver;
+
+ if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+ perror("Failed to mlock privcmd structure");
+ return -1;
+ }
+
+ if (mlock( multicall, sizeof(multicall_entry_t)) < 0) {
+ perror("Failed to mlock multicall_entry structure");
+ munlock( &multicall, sizeof(multicall_entry_t));
+ return -1;
+ }
+
+ if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+ perror("Hypercall failed");
+ ret = -1;
+ }
+
+ *vnum = multicall[0].result;
+
+ munlock( &privcmd, sizeof(privcmd_hypercall_t));
+ munlock( &multicall, sizeof(multicall_entry_t));
+
+ return ret;
+}
+
+/* Make Xen Dom0 op hypervisor call */
+static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op, int
dom_opcode)
+{
+ privcmd_hypercall_t privcmd;
+ int ret = 0;
+
+ /* set up for doing hypercall */
+ privcmd.op = __HYPERVISOR_dom0_op;
+ privcmd.arg[0] = (unsigned long)dom_op;
+ dom_op->cmd = dom_opcode;
+ dom_op->interface_version = DOM0_INTERFACE_VERSION;
+
+ if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+ perror("Failed to mlock privcmd structure");
+ return -1;
+ }
+
+ if (mlock( dom_op, sizeof(dom0_op_t)) < 0) {
+ perror("Failed to mlock dom0_op structure");
+ munlock( &privcmd, sizeof(privcmd_hypercall_t));
+ return -1;
+ }
+
+ if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+ perror("Hypercall failed");
+ ret = -1;
+ }
+
+ munlock( &privcmd, sizeof(privcmd_hypercall_t));
+ munlock( dom_op, sizeof(dom0_op_t));
+
+ return ret;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_physinfo(xi_handle *handle, dom0_physinfo_t *physinfo)
+{
+ dom0_op_t op;
+
+ if (xi_make_dom0_op(handle, &op, DOM0_PHYSINFO) < 0) {
+ perror("DOM0_PHYSINFO Hypercall failed");
+ return -1;
+ }
+
+ *physinfo = op.u.physinfo;
+ return 0;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *handle, dom0_getdomaininfo_t *info,
+ unsigned int first_domain, unsigned int max_domains)
+{
+ dom0_op_t op;
+ op.u.getdomaininfolist.first_domain = first_domain;
+ op.u.getdomaininfolist.max_domains = max_domains;
+ op.u.getdomaininfolist.buffer = info;
+
+ if (mlock( info, max_domains * sizeof(dom0_getdomaininfo_t)) < 0) {
+ perror("Failed to mlock domaininfo array");
+ return -1;
+ }
+
+ if (xi_make_dom0_op(handle, &op, DOM0_GETDOMAININFOLIST) < 0) {
+ perror("DOM0_GETDOMAININFOLIST Hypercall failed");
+ return -1;
+ }
+
+ return op.u.getdomaininfolist.num_domains;
+}
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *handle, unsigned int domain,
+ unsigned int vcpu)
+{
+ dom0_op_t op;
+ op.u.getvcpucontext.domain = domain;
+ op.u.getvcpucontext.vcpu = vcpu;
+ op.u.getvcpucontext.ctxt = NULL;
+
+ if (xi_make_dom0_op(handle, &op, DOM0_GETVCPUCONTEXT) < 0) {
+ perror("DOM0_GETVCPUCONTEXT Hypercall failed");
+ return -1;
+ }
+
+ return op.u.getvcpucontext.cpu_time;
+}
+
+/* gets xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver)
+{
+
+ /* gets the XENVER_version and XENVER_extraversion */
+ if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {;
+ perror("XEN VERSION Hypercall failed");
+ return -1;
+ }
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstat/libxenstat/src/xen-interface.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,53 @@
+/* xen-interface.h
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ * Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <stdint.h>
+
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+#include "dom0_ops.h"
+#include "version.h"
+
+/* Opaque handles */
+typedef struct xi_handle xi_handle;
+
+/* Initialize for xen-interface. Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle);
+
+/* Obtain xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *, long *vnum, xen_extraversion_t *ver);
+
+/* Obtain physinfo data from dom0 */
+int xi_get_physinfo(xi_handle *, dom0_physinfo_t *);
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *, dom0_getdomaininfo_t *, unsigned int,
+ unsigned int);
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *, unsigned int, unsigned int);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/src/xenstat.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,640 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ * Judy Fischbach <jfisch@xxxxxxxxxx>
+ * David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <xen-interface.h>
+#include "xenstat.h"
+#include "version.h"
+
+/*
+ * Types
+ */
+struct xenstat_handle {
+ xi_handle *xihandle;
+ int page_size;
+ FILE *procnetdev;
+};
+
+#define SHORT_ASC_LEN 5 /* length of 65535 */
+#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1)
+
+struct xenstat_node {
+ unsigned int flags;
+ unsigned long long cpu_hz;
+ unsigned int num_cpus;
+ unsigned long long tot_mem;
+ unsigned long long free_mem;
+ unsigned int num_domains;
+ char xen_version[VERSION_SIZE]; /* xen version running on this node */
+ xenstat_domain *domains; /* Array of length num_domains */
+};
+
+struct xenstat_domain {
+ unsigned int id;
+ unsigned int state;
+ unsigned long long cpu_ns;
+ unsigned int num_vcpus;
+ xenstat_vcpu *vcpus; /* Array of length num_vcpus */
+ unsigned long long cur_mem; /* Current memory reservation */
+ unsigned long long max_mem; /* Total memory allowed */
+ unsigned int ssid;
+ unsigned int num_networks;
+ xenstat_network *networks; /* Array of length num_networks */
+};
+
+struct xenstat_vcpu {
+ unsigned long long ns;
+};
+
+struct xenstat_network {
+ unsigned int id;
+ /* Received */
+ unsigned long long rbytes;
+ unsigned long long rpackets;
+ unsigned long long rerrs;
+ unsigned long long rdrop;
+ /* Transmitted */
+ unsigned long long tbytes;
+ unsigned long long tpackets;
+ unsigned long long terrs;
+ unsigned long long tdrop;
+};
+
+/*
+ * Data-collection types
+ */
+/* Called to collect the information for the node and all the domains on
+ * it. When called, the domain information has already been collected. */
+typedef int (*xenstat_collect_func)(xenstat_handle * handle,
+ xenstat_node * node);
+/* Called to free the information collected by the collect function. The free
+ * function will only be called on a xenstat_node if that node includes
+ * information collected by the corresponding collector. */
+typedef void (*xenstat_free_func)(xenstat_node * node);
+/* Called to free any information stored in the handle. Note the lack of a
+ * matching init function; the collect functions should initialize on first
+ * use. Also, the uninit function must handle the case that the collector has
+ * never been initialized. */
+typedef void (*xenstat_uninit_func)(xenstat_handle * handle);
+typedef struct xenstat_collector {
+ unsigned int flag;
+ xenstat_collect_func collect;
+ xenstat_free_func free;
+ xenstat_uninit_func uninit;
+} xenstat_collector;
+
+static int xenstat_collect_vcpus(xenstat_handle * handle,
+ xenstat_node * node);
+static int xenstat_collect_networks(xenstat_handle * handle,
+ xenstat_node * node);
+static void xenstat_free_vcpus(xenstat_node * node);
+static void xenstat_free_networks(xenstat_node * node);
+static void xenstat_uninit_vcpus(xenstat_handle * handle);
+static void xenstat_uninit_networks(xenstat_handle * handle);
+
+static xenstat_collector collectors[] = {
+ { XENSTAT_VCPU, xenstat_collect_vcpus,
+ xenstat_free_vcpus, xenstat_uninit_vcpus },
+ { XENSTAT_NETWORK, xenstat_collect_networks,
+ xenstat_free_networks, xenstat_uninit_networks }
+};
+
+#define NUM_COLLECTORS (sizeof(collectors)/sizeof(xenstat_collector))
+
+/*
+ * libxenstat API
+ */
+xenstat_handle *xenstat_init()
+{
+ xenstat_handle *handle;
+
+ handle = (xenstat_handle *) calloc(1, sizeof(xenstat_handle));
+ if (handle == NULL)
+ return NULL;
+
+#if defined(PAGESIZE)
+ handle->page_size = PAGESIZE;
+#elif defined(PAGE_SIZE)
+ handle->page_size = PAGE_SIZE;
+#else
+ handle->page_size = sysconf(_SC_PAGE_SIZE);
+ if (handle->page_size < 0) {
+ perror("Failed to retrieve page size.");
+ free(handle);
+ return NULL;
+ }
+#endif
+
+ handle->xihandle = xi_init();
+ if (handle->xihandle == NULL) {
+ perror("xi_init");
+ free(handle);
+ return NULL;
+ }
+
+ return handle;
+}
+
+void xenstat_uninit(xenstat_handle * handle)
+{
+ unsigned int i;
+ if (handle) {
+ for (i = 0; i < NUM_COLLECTORS; i++)
+ collectors[i].uninit(handle);
+ xi_uninit(handle->xihandle);
+ free(handle);
+ }
+}
+
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags)
+{
+#define DOMAIN_CHUNK_SIZE 256
+ xenstat_node *node;
+ dom0_physinfo_t physinfo;
+ xen_extraversion_t version;
+ long vnum = 0;
+ dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
+ unsigned int num_domains, new_domains;
+ unsigned int i;
+
+ /* Create the node */
+ node = (xenstat_node *) calloc(1, sizeof(xenstat_node));
+ if (node == NULL)
+ return NULL;
+
+ /* Get information about the physical system */
+ if (xi_get_physinfo(handle->xihandle, &physinfo) < 0) {
+ free(node);
+ return NULL;
+ }
+
+ /* Get the xen version number and xen version tag */
+ if (xi_get_xen_version(handle->xihandle, &vnum, &version) < 0) {
+ free(node);
+ return NULL;
+ }
+ snprintf(node->xen_version, VERSION_SIZE,
+ "%ld.%ld%s\n", ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, (char
*)version);
+
+ node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
+ node->num_cpus =
+ (physinfo.threads_per_core * physinfo.cores_per_socket *
+ physinfo.sockets_per_node * physinfo.nr_nodes);
+ node->tot_mem = ((unsigned long long)physinfo.total_pages)
+ * handle->page_size;
+ node->free_mem = ((unsigned long long)physinfo.free_pages)
+ * handle->page_size;
+
+ /* malloc(0) is not portable, so allocate a single domain. This will
+ * be resized below. */
+ node->domains = malloc(sizeof(xenstat_domain));
+ if (node->domains == NULL) {
+ free(node);
+ return NULL;
+ }
+
+ num_domains = 0;
+ do {
+ xenstat_domain *domain;
+
+ new_domains = xi_get_domaininfolist(handle->xihandle,
+ domaininfo, num_domains,
+ DOMAIN_CHUNK_SIZE);
+
+ node->domains = realloc(node->domains,
+ (num_domains + new_domains)
+ * sizeof(xenstat_domain));
+ if (node->domains == NULL) {
+ free(node);
+ return NULL;
+ }
+
+ domain = node->domains + num_domains;
+
+ for (i = 0; i < new_domains; i++) {
+ /* Fill in domain using domaininfo[i] */
+ domain->id = domaininfo[i].domain;
+ domain->state = domaininfo[i].flags;
+ domain->cpu_ns = domaininfo[i].cpu_time;
+ domain->num_vcpus = domaininfo[i].n_vcpu;
+ domain->vcpus = NULL;
+ domain->cur_mem =
+ ((unsigned long long)domaininfo[i].tot_pages)
+ * handle->page_size;
+ domain->max_mem =
+ domaininfo[i].max_pages == UINT_MAX
+ ? (unsigned long long)-1
+ : (unsigned long long)(domaininfo[i].max_pages
+ * handle->page_size);
+ domain->ssid = domaininfo[i].ssidref;
+ domain->num_networks = 0;
+ domain->networks = NULL;
+
+ domain++;
+ }
+ num_domains += new_domains;
+ } while (new_domains == DOMAIN_CHUNK_SIZE);
+ node->num_domains = num_domains;
+
+ /* Run all the extra data collectors requested */
+ node->flags = 0;
+ for (i = 0; i < NUM_COLLECTORS; i++) {
+ if ((flags & collectors[i].flag) == collectors[i].flag) {
+ node->flags |= collectors[i].flag;
+ if(collectors[i].collect(handle, node) == 0) {
+ xenstat_free_node(node);
+ return NULL;
+ }
+ }
+ }
+
+ return node;
+}
+
+void xenstat_free_node(xenstat_node * node)
+{
+ int i;
+
+ if (node) {
+ if (node->domains) {
+ for (i = 0; i < NUM_COLLECTORS; i++)
+ if((node->flags & collectors[i].flag)
+ == collectors[i].flag)
+ collectors[i].free(node);
+ free(node->domains);
+ }
+ free(node);
+ }
+}
+
+xenstat_domain *xenstat_node_domain(xenstat_node * node, unsigned int domid)
+{
+ unsigned int i;
+
+ /* FIXME: binary search */
+ /* Find the appropriate domain entry in the node struct. */
+ for (i = 0; i < node->num_domains; i++) {
+ if (node->domains[i].id == domid)
+ return &(node->domains[i]);
+ }
+ return NULL;
+}
+
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+ unsigned int index)
+{
+ if (0 <= index && index < node->num_domains)
+ return &(node->domains[index]);
+ return NULL;
+}
+
+const char *xenstat_node_xen_ver(xenstat_node * node)
+{
+ return node->xen_version;
+}
+
+unsigned long long xenstat_node_tot_mem(xenstat_node * node)
+{
+ return node->tot_mem;
+}
+
+unsigned long long xenstat_node_free_mem(xenstat_node * node)
+{
+ return node->free_mem;
+}
+
+unsigned int xenstat_node_num_domains(xenstat_node * node)
+{
+ return node->num_domains;
+}
+
+unsigned int xenstat_node_num_cpus(xenstat_node * node)
+{
+ return node->num_cpus;
+}
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node)
+{
+ return node->cpu_hz;
+}
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain)
+{
+ return domain->id;
+}
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain)
+{
+ return domain->cpu_ns;
+}
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain)
+{
+ return domain->num_vcpus;
+}
+
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain, unsigned int vcpu)
+{
+ if (0 <= vcpu && vcpu < domain->num_vcpus)
+ return &(domain->vcpus[vcpu]);
+ return NULL;
+}
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain)
+{
+ return domain->cur_mem;
+}
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain)
+{
+ return domain->max_mem;
+}
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain)
+{
+ return domain->ssid;
+}
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain)
+{
+ return (domain->state & DOMFLAGS_DYING) == DOMFLAGS_DYING;
+}
+
+unsigned int xenstat_domain_crashed(xenstat_domain * domain)
+{
+ return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+ && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+ & DOMFLAGS_SHUTDOWNMASK) == SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain)
+{
+ return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+ && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+ & DOMFLAGS_SHUTDOWNMASK) != SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_paused(xenstat_domain * domain)
+{
+ return (domain->state & DOMFLAGS_PAUSED) == DOMFLAGS_PAUSED;
+}
+
+unsigned int xenstat_domain_blocked(xenstat_domain * domain)
+{
+ return (domain->state & DOMFLAGS_BLOCKED) == DOMFLAGS_BLOCKED;
+}
+
+unsigned int xenstat_domain_running(xenstat_domain * domain)
+{
+ return (domain->state & DOMFLAGS_RUNNING) == DOMFLAGS_RUNNING;
+}
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain * domain)
+{
+ return domain->num_networks;
+}
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+ unsigned int network)
+{
+ if (domain->networks && 0 <= network && network < domain->num_networks)
+ return &(domain->networks[network]);
+ return NULL;
+}
+
+/*
+ * VCPU functions
+ */
+/* Collect information about VCPUs */
+static int xenstat_collect_vcpus(xenstat_handle * handle, xenstat_node * node)
+{
+ unsigned int i, vcpu;
+ /* Fill in VCPU information */
+ for (i = 0; i < node->num_domains; i++) {
+ node->domains[i].vcpus = malloc(node->domains[i].num_vcpus
+ * sizeof(xenstat_vcpu));
+ if (node->domains[i].vcpus == NULL)
+ return 0;
+
+ for (vcpu = 0; vcpu < node->domains[i].num_vcpus; vcpu++) {
+ /* FIXME: need to be using a more efficient mechanism*/
+ long long vcpu_time;
+ vcpu_time =
+ xi_get_vcpu_usage(handle->xihandle,
+ node->domains[i].id,
+ vcpu);
+ if (vcpu_time < 0)
+ return 0;
+ node->domains[i].vcpus[vcpu].ns = vcpu_time;
+ }
+ }
+ return 1;
+}
+
+/* Free VCPU information */
+static void xenstat_free_vcpus(xenstat_node * node)
+{
+ unsigned int i;
+ for (i = 0; i < node->num_domains; i++)
+ free(node->domains[i].vcpus);
+}
+
+/* Free VCPU information in handle - nothing to do */
+static void xenstat_uninit_vcpus(xenstat_handle * handle)
+{
+}
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu)
+{
+ return vcpu->ns;
+}
+
+/*
+ * Network functions
+ */
+
+/* Expected format of /proc/net/dev */
+static const char PROCNETDEV_HEADER[] =
+ "Inter-| Receive |"
+ " Transmit\n"
+ " face |bytes packets errs drop fifo frame compressed multicast|"
+ "bytes packets errs drop fifo colls carrier compressed\n";
+
+/* Collect information about networks */
+static int xenstat_collect_networks(xenstat_handle * handle,
+ xenstat_node * node)
+{
+ /* Open and validate /proc/net/dev if we haven't already */
+ if (handle->procnetdev == NULL) {
+ char header[sizeof(PROCNETDEV_HEADER)];
+ handle->procnetdev = fopen("/proc/net/dev", "r");
+ if (handle->procnetdev == NULL) {
+ perror("Error opening /proc/net/dev");
+ return 1;
+ }
+
+ /* Validate the format of /proc/net/dev */
+ if (fread(header, sizeof(PROCNETDEV_HEADER) - 1, 1,
+ handle->procnetdev) != 1) {
+ perror("Error reading /proc/net/dev header");
+ return 1;
+ }
+ header[sizeof(PROCNETDEV_HEADER) - 1] = '\0';
+ if (strcmp(header, PROCNETDEV_HEADER) != 0) {
+ fprintf(stderr,
+ "Unexpected /proc/net/dev format\n");
+ return 1;
+ }
+ }
+
+ /* Fill in networks */
+ /* FIXME: optimize this */
+ fseek(handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1, SEEK_SET);
+ while (1) {
+ xenstat_domain *domain;
+ xenstat_network net;
+ unsigned int domid;
+ int ret = fscanf(handle->procnetdev,
+ "vif%u.%u:%llu%llu%llu%llu%*u%*u%*u%*u"
+ "%llu%llu%llu%llu%*u%*u%*u%*u\n",
+ &domid, &net.id,
+ &net.tbytes, &net.tpackets, &net.terrs,
+ &net.tdrop,
+ &net.rbytes, &net.rpackets, &net.rerrs,
+ &net.rdrop);
+ if (ret == EOF)
+ break;
+ if (ret != 10) {
+ unsigned int c;
+ do {
+ c = fgetc(handle->procnetdev);
+ } while (c != '\n' && c != EOF);
+ if (c == EOF)
+ break;
+ continue;
+ }
+
+ /* FIXME: this does a search for the domid */
+ domain = xenstat_node_domain(node, domid);
+ if (domain == NULL) {
+ fprintf(stderr,
+ "Found interface vif%u.%u but domain %u"
+ " does not exist.\n", domid, net.id,
+ domid);
+ continue;
+ }
+ if (domain->networks == NULL) {
+ domain->num_networks = 1;
+ domain->networks = malloc(sizeof(xenstat_network));
+ } else {
+ domain->num_networks++;
+ domain->networks =
+ realloc(domain->networks,
+ domain->num_networks *
+ sizeof(xenstat_network));
+ }
+ if (domain->networks == NULL)
+ return 1;
+ domain->networks[domain->num_networks - 1] = net;
+ }
+
+ return 1;
+}
+
+/* Free network information */
+static void xenstat_free_networks(xenstat_node * node)
+{
+ unsigned int i;
+ for (i = 0; i < node->num_domains; i++)
+ free(node->domains[i].networks);
+}
+
+/* Free network information in handle */
+static void xenstat_uninit_networks(xenstat_handle * handle)
+{
+ if(handle->procnetdev)
+ fclose(handle->procnetdev);
+}
+
+/* Get the network ID */
+unsigned int xenstat_network_id(xenstat_network * network)
+{
+ return network->id;
+}
+
+/* Get the number of receive bytes */
+unsigned long long xenstat_network_rbytes(xenstat_network * network)
+{
+ return network->rbytes;
+}
+
+/* Get the number of receive packets */
+unsigned long long xenstat_network_rpackets(xenstat_network * network)
+{
+ return network->rpackets;
+}
+
+/* Get the number of receive errors */
+unsigned long long xenstat_network_rerrs(xenstat_network * network)
+{
+ return network->rerrs;
+}
+
+/* Get the number of receive drops */
+unsigned long long xenstat_network_rdrop(xenstat_network * network)
+{
+ return network->rdrop;
+}
+
+/* Get the number of transmit bytes */
+unsigned long long xenstat_network_tbytes(xenstat_network * network)
+{
+ return network->tbytes;
+}
+
+/* Get the number of transmit packets */
+unsigned long long xenstat_network_tpackets(xenstat_network * network)
+{
+ return network->tpackets;
+}
+
+/* Get the number of transmit errors */
+unsigned long long xenstat_network_terrs(xenstat_network * network)
+{
+ return network->terrs;
+}
+
+/* Get the number of transmit dropped packets */
+unsigned long long xenstat_network_tdrop(xenstat_network * network)
+{
+ return network->tdrop;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/src/xenstat.h
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,150 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ * Judy Fischbach <jfisch@xxxxxxxxxx>
+ * David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+/* libxenstat API */
+
+/* Opaque handles */
+typedef struct xenstat_handle xenstat_handle;
+typedef struct xenstat_domain xenstat_domain;
+typedef struct xenstat_node xenstat_node;
+typedef struct xenstat_vcpu xenstat_vcpu;
+typedef struct xenstat_network xenstat_network;
+
+/* Initialize the xenstat library. Returns a handle to be used with
+ * subsequent calls to the xenstat library, or NULL if an error occurs. */
+xenstat_handle *xenstat_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xenstat_uninit(xenstat_handle * handle);
+
+/* Get all available information about a node */
+#define XENSTAT_VCPU 0x1
+#define XENSTAT_NETWORK 0x2
+#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK)
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags);
+
+/* Free the information */
+void xenstat_free_node(xenstat_node * node);
+
+/*
+ * Node functions - extract information from a xenstat_node
+ */
+
+/* Get information about the domain with the given domain ID */
+xenstat_domain *xenstat_node_domain(xenstat_node * node,
+ unsigned int domid);
+
+/* Get the domain with the given index; used to loop over all domains. */
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+ unsigned index);
+/* Get xen version of the node */
+const char *xenstat_node_xen_ver(xenstat_node * node);
+
+/* Get amount of total memory on a node */
+unsigned long long xenstat_node_tot_mem(xenstat_node * node);
+
+/* Get amount of free memory on a node */
+unsigned long long xenstat_node_free_mem(xenstat_node * node);
+
+/* Find the number of domains existing on a node */
+unsigned int xenstat_node_num_domains(xenstat_node * node);
+
+/* Find the number of CPUs existing on a node */
+unsigned int xenstat_node_num_cpus(xenstat_node * node);
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node);
+
+/*
+ * Domain functions - extract information from a xenstat_domain
+ */
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain);
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain);
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain);
+
+/* Get the VCPU handle to obtain VCPU stats */
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain,
+ unsigned int vcpu);
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain);
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain);
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain);
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain);
+unsigned int xenstat_domain_crashed(xenstat_domain * domain);
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain);
+unsigned int xenstat_domain_paused(xenstat_domain * domain);
+unsigned int xenstat_domain_blocked(xenstat_domain * domain);
+unsigned int xenstat_domain_running(xenstat_domain * domain);
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain *);
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+ unsigned int network);
+
+/*
+ * VCPU functions - extract information from a xenstat_vcpu
+ */
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu);
+
+
+/*
+ * Network functions - extract information from a xenstat_network
+ */
+
+/* Get the ID for this network */
+unsigned int xenstat_network_id(xenstat_network * network);
+
+/* Get the number of receive bytes for this network */
+unsigned long long xenstat_network_rbytes(xenstat_network * network);
+
+/* Get the number of receive packets for this network */
+unsigned long long xenstat_network_rpackets(xenstat_network * network);
+
+/* Get the number of receive errors for this network */
+unsigned long long xenstat_network_rerrs(xenstat_network * network);
+
+/* Get the number of receive drops for this network */
+unsigned long long xenstat_network_rdrop(xenstat_network * network);
+
+/* Get the number of transmit bytes for this network */
+unsigned long long xenstat_network_tbytes(xenstat_network * network);
+
+/* Get the number of transmit packets for this network */
+unsigned long long xenstat_network_tpackets(xenstat_network * network);
+
+/* Get the number of transmit errors for this network */
+unsigned long long xenstat_network_terrs(xenstat_network * network);
+
+/* Get the number of transmit drops for this network */
+unsigned long long xenstat_network_tdrop(xenstat_network * network);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/Makefile
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,44 @@
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; under version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+ifneq ($(XENSTAT_XENTOP),y)
+all install xentop:
+else
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755 -D
+INSTALL_DATA = $(INSTALL) -m0644 -D
+
+prefix=/usr
+mandir=$(prefix)/share/man
+man1dir=$(mandir)/man1
+sbindir=$(prefix)/sbin
+
+CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT)
+LDFLAGS += -L$(XEN_LIBXENSTAT)
+LDLIBS += -lxenstat -lncurses
+
+all: xentop
+
+xentop: xentop.o
+
+install: xentop xentop.1
+ $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop
+ $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1
+
+endif
+
+clean:
+ rm -f xentop xentop.o
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/TODO
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/TODO Thu Aug 25 22:53:20 2005
@@ -0,0 +1,34 @@
+Display error messages on the help line after bad input at a prompt.
+Fractional delay times
+Use prompting to search for domains
+Better line editing?
+
+* Make CPU in % more accurate
+* Domain total network TX % and RX %
+
+Like Top, f feature, field select of domain columns, toggle the display of
+field by typing the letter associated with field, if displayed it shows in
+bold and the letter is Capitalized along with a leading asterisk for the
+field, if not selected for display letter is lowercase, no leading asterisk
+and field is not bolded.
+
+Like Top, ordering of domain columns, o feature Capital letter shifts left,
+lowercase letter shifts right?
+
+Color
+Full management: pause, destroy, create domains
+
+Add support for Virtual Block Devices (vbd)
+
+To think about:
+Support for one than one node display (distributed monitoring
+from any node of all other nodes in a cluster)
+Bottom line option (Switch node, Search node [tab completion?])
+
+Capture/Logging of resource information generated during a time interval.
+-b batch mode dump snapshots to standard output (used with -n)
+-n number of iterations to dump to standard output (unlimited if not specified)
+-d monitor DomIDs as -dD1,-dD2 or -dD1,D2...
+ Monitor only domains with specified domain IDs
+-m monitor nodeIDs as -mN1,-mN2 or -mN1,N2...
+ Monitor only domains with specified node IDs
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/xentop.1
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/xentop.1 Thu Aug 25 22:53:20 2005
@@ -0,0 +1,88 @@
+.\" Copyright (C) International Business Machines Corp., 2005
+.\" Author: Josh Triplett <josht@xxxxxxxxxx>
+.\"
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; under version 2 of the License.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+.TH xentop 1 "August 2005"
+.SH NAME
+\fBxentop\fR \- displays real-time information about a Xen system and domains
+
+.SH SYNOPSIS
+.B xentop
+[\fB\-h\fR]
+[\fB\-V\fR]
+[\fB\-d\fRSECONDS]
+[\fB\-n\fR]
+[\fB\-r\fR]
+[\fB\-v\fR]
+
+.SH DESCRIPTION
+\fBxentop\fR displays information about the Xen system and domains, in a
+continually-updating manner. Command-line options and interactive commands
+can change the detail and format of the information displayed by \fBxentop\fR.
+
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+output version information and exit
+.TP
+\fB\-d\fR, \fB\-\-delay\fR=\fISECONDS\fR
+seconds between updates (default 3)
+.TP
+\fB\-n\fR, \fB\-\-networks\fR
+output network information
+.TP
+\fB\-r\fR, \fB\-\-repeat\-header\fR
+repeat table header before each domain
+.TP
+\fB\-v\fR, \fB\-\-vcpus\fR
+output VCPU data
+
+.SH "INTERACTIVE COMMANDS"
+All interactive commands are case-insensitive.
+.TP
+.B D
+set delay between updates
+.TP
+.B N
+toggle display of network information
+.TP
+.B Q, Esc
+quit
+.TP
+.B R
+toggle table header before each domain
+.TP
+.B S
+cycle sort order
+.TP
+.B V
+toggle display of VCPU information
+.TP
+.B Arrows
+scroll domain display
+
+.SH AUTHORS
+Written by Judy Fischbach, David Hendricks, and Josh Triplett
+
+.SH "REPORTING BUGS"
+Report bugs to <dsteklof@xxxxxxxxxx>.
+
+.SH COPYRIGHT
+Copyright \(co 2005 International Business Machines Corp
+.br
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/xentop.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/xentop.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,876 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Judy Fischbach <jfisch@xxxxxxxxxx>
+ * David Hendricks <dhendrix@xxxxxxxxxx>
+ * Josh Triplett <josht@xxxxxxxxxx>
+ * based on code from Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <curses.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <xenstat.h>
+
+#define XENTOP_VERSION "1.0"
+
+#define XENTOP_DISCLAIMER \
+"Copyright (C) 2005 International Business Machines Corp\n"\
+"This is free software; see the source for copying conditions.There is NO\n"\
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+#define XENTOP_BUGSTO "Report bugs to <dsteklof@xxxxxxxxxx>.\n"
+
+#define _GNU_SOURCE
+#include <getopt.h>
+
+#if !defined(__GNUC__) && !defined(__GNUG__)
+#define __attribute__(arg) /* empty */
+#endif
+
+#define KEY_ESCAPE '\x1B'
+
+/*
+ * Function prototypes
+ */
+/* Utility functions */
+static void usage(const char *);
+static void version(void);
+static void cleanup(void);
+static void fail(const char *);
+static int current_row(void);
+static int lines(void);
+static void print(const char *, ...) __attribute__((format(printf,1,2)));
+static void attr_addstr(int attr, const char *str);
+static void set_delay(char *value);
+static void set_prompt(char *new_prompt, void (*func)(char *));
+static int handle_key(int);
+static int compare(unsigned long long, unsigned long long);
+static int compare_domains(xenstat_domain **, xenstat_domain **);
+static unsigned long long tot_net_bytes( xenstat_domain *, int);
+
+/* Field functions */
+static int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_domid(xenstat_domain *domain);
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_state(xenstat_domain *domain);
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu(xenstat_domain *domain);
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu_pct(xenstat_domain *domain);
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_mem(xenstat_domain *domain);
+static void print_mem_pct(xenstat_domain *domain);
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_maxmem(xenstat_domain *domain);
+static void print_max_pct(xenstat_domain *domain);
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_vcpus(xenstat_domain *domain);
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_nets(xenstat_domain *domain);
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_tx(xenstat_domain *domain);
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_rx(xenstat_domain *domain);
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_ssid(xenstat_domain *domain);
+
+/* Section printing functions */
+static void do_summary(void);
+static void do_header(void);
+static void do_bottom_line(void);
+static void do_domain(xenstat_domain *);
+static void do_vcpu(xenstat_domain *);
+static void do_network(xenstat_domain *);
+static void top(void);
+
+/* Field types */
+typedef enum field_id {
+ FIELD_DOMID,
+ FIELD_STATE,
+ FIELD_CPU,
+ FIELD_CPU_PCT,
+ FIELD_MEM,
+ FIELD_MEM_PCT,
+ FIELD_MAXMEM,
+ FIELD_MAX_PCT,
+ FIELD_VCPUS,
+ FIELD_NETS,
+ FIELD_NET_TX,
+ FIELD_NET_RX,
+ FIELD_SSID
+} field_id;
+
+typedef struct field {
+ field_id num;
+ const char *header;
+ unsigned int default_width;
+ int (*compare)(xenstat_domain *domain1, xenstat_domain *domain2);
+ void (*print)(xenstat_domain *domain);
+} field;
+
+field fields[] = {
+ { FIELD_DOMID, "DOMID", 5, compare_domid, print_domid },
+ { FIELD_STATE, "STATE", 6, compare_state, print_state },
+ { FIELD_CPU, "CPU(sec)", 10, compare_cpu, print_cpu },
+ { FIELD_CPU_PCT, "CPU(%)", 6, compare_cpu_pct, print_cpu_pct },
+ { FIELD_MEM, "MEM(k)", 10, compare_mem, print_mem },
+ { FIELD_MEM_PCT, "MEM(%)", 6, compare_mem, print_mem_pct },
+ { FIELD_MAXMEM, "MAXMEM(k)", 10, compare_maxmem, print_maxmem },
+ { FIELD_MAX_PCT, "MAXMEM(%)", 9, compare_maxmem, print_max_pct },
+ { FIELD_VCPUS, "VCPUS", 5, compare_vcpus, print_vcpus },
+ { FIELD_NETS, "NETS", 4, compare_nets, print_nets },
+ { FIELD_NET_TX, "NETTX(k)", 8, compare_net_tx, print_net_tx },
+ { FIELD_NET_RX, "NETRX(k)", 8, compare_net_rx, print_net_rx },
+ { FIELD_SSID, "SSID", 4, compare_ssid, print_ssid }
+};
+
+const unsigned int NUM_FIELDS = sizeof(fields)/sizeof(field);
+
+/* Globals */
+struct timeval curtime, oldtime;
+xenstat_handle *xhandle = NULL;
+xenstat_node *prev_node = NULL;
+xenstat_node *cur_node = NULL;
+field_id sort_field = FIELD_DOMID;
+unsigned int first_domain_index = 0;
+unsigned int delay = 3;
+int show_vcpus = 0;
+int show_networks = 0;
+int repeat_header = 0;
+#define PROMPT_VAL_LEN 80
+char *prompt = NULL;
+char prompt_val[PROMPT_VAL_LEN];
+int prompt_val_len = 0;
+void (*prompt_complete_func)(char *);
+
+/*
+ * Function definitions
+ */
+
+/* Utility functions */
+
+/* Print usage message, using given program name */
+static void usage(const char *program)
+{
+ printf("Usage: %s [OPTION]\n"
+ "Displays ongoing information about xen vm resources \n\n"
+ "-h, --help display this help and exit\n"
+ "-V, --version output version information and exit\n"
+ "-d, --delay=SECONDS seconds between updates (default 3)\n"
+ "-n, --networks output vif network data\n"
+ "-r, --repeat-header repeat table header before each domain\n"
+ "-v, --vcpus output vcpu data\n"
+ "\n" XENTOP_BUGSTO,
+ program);
+ return;
+}
+
+/* Print program version information */
+static void version(void)
+{
+ printf("xentop " XENTOP_VERSION "\n"
+ "Written by Judy Fischbach, David Hendricks, Josh Triplett\n"
+ "\n" XENTOP_DISCLAIMER);
+}
+
+/* Clean up any open resources */
+static void cleanup(void)
+{
+ if(!isendwin())
+ endwin();
+ if(prev_node != NULL)
+ xenstat_free_node(prev_node);
+ if(cur_node != NULL)
+ xenstat_free_node(cur_node);
+ if(xhandle != NULL)
+ xenstat_uninit(xhandle);
+}
+
+/* Display the given message and gracefully exit */
+static void fail(const char *str)
+{
+ if(!isendwin())
+ endwin();
+ fprintf(stderr, str);
+ exit(1);
+}
+
+/* Return the row containing the cursor. */
+static int current_row(void)
+{
+ int y, x;
+ getyx(stdscr, y, x);
+ return y;
+}
+
+/* Return the number of lines on the screen. */
+static int lines(void)
+{
+ int y, x;
+ getmaxyx(stdscr, y, x);
+ return y;
+}
+
+/* printf-style print function which calls printw, but only if the cursor is
+ * not on the last line. */
+static void print(const char *fmt, ...)
+{
+ va_list args;
+
+ if(current_row() < lines()-1) {
+ va_start(args, fmt);
+ vw_printw(stdscr, fmt, args);
+ va_end(args);
+ }
+}
+
+/* Print a string with the given attributes set. */
+static void attr_addstr(int attr, const char *str)
+{
+ attron(attr);
+ addstr(str);
+ attroff(attr);
+}
+
+/* Handle setting the delay from the user-supplied value in prompt_val */
+static void set_delay(char *value)
+{
+ int new_delay;
+ new_delay = atoi(value);
+ if(new_delay > 0)
+ delay = new_delay;
+}
+
+/* Enable prompting mode with the given prompt string; call the given function
+ * when a value is available. */
+static void set_prompt(char *new_prompt, void (*func)(char *))
+{
+ prompt = new_prompt;
+ prompt_val[0] = '\0';
+ prompt_val_len = 0;
+ prompt_complete_func = func;
+}
+
+/* Handle user input, return 0 if the program should quit, or 1 if not */
+static int handle_key(int ch)
+{
+ if(prompt == NULL) {
+ /* Not prompting for input; handle interactive commands */
+ switch(ch) {
+ case 'n': case 'N':
+ show_networks ^= 1;
+ break;
+ case 'r': case 'R':
+ repeat_header ^= 1;
+ break;
+ case 's': case 'S':
+ sort_field = (sort_field + 1) % NUM_FIELDS;
+ break;
+ case 'v': case 'V':
+ show_vcpus ^= 1;
+ break;
+ case KEY_DOWN:
+ first_domain_index++;
+ break;
+ case KEY_UP:
+ if(first_domain_index > 0)
+ first_domain_index--;
+ break;
+ case 'd': case 'D':
+ set_prompt("Delay(sec)", set_delay);
+ break;
+ case 'q': case 'Q': case KEY_ESCAPE:
+ return 0;
+ }
+ } else {
+ /* Prompting for input; handle line editing */
+ switch(ch) {
+ case '\r':
+ prompt_complete_func(prompt_val);
+ set_prompt(NULL, NULL);
+ break;
+ case KEY_ESCAPE:
+ set_prompt(NULL, NULL);
+ break;
+ case KEY_BACKSPACE:
+ if(prompt_val_len > 0)
+ prompt_val[--prompt_val_len] = '\0';
+ default:
+ if((prompt_val_len+1) < PROMPT_VAL_LEN
+ && isprint(ch)) {
+ prompt_val[prompt_val_len++] = (char)ch;
+ prompt_val[prompt_val_len] = '\0';
+ }
+ }
+ }
+
+ return 1;
+}
+
+/* Compares two integers, returning -1,0,1 for <,=,> */
+static int compare(unsigned long long i1, unsigned long long i2)
+{
+ if(i1 < i2)
+ return -1;
+ if(i1 > i2)
+ return 1;
+ return 0;
+}
+
+/* Comparison function for use with qsort. Compares two domains using the
+ * current sort field. */
+static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2)
+{
+ return fields[sort_field].compare(*domain1, *domain2);
+}
+
+/* Field functions */
+
+/* Compares domain ids of two domains, returning -1,0,1 for <,=,> */
+int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return compare(xenstat_domain_id(domain1), xenstat_domain_id(domain2));
+}
+
+/* Prints domain identification number */
+void print_domid(xenstat_domain *domain)
+{
+ print("%5u", xenstat_domain_id(domain));
+}
+
+struct {
+ unsigned int (*get)(xenstat_domain *);
+ char ch;
+} state_funcs[] = {
+ { xenstat_domain_dying, 'd' },
+ { xenstat_domain_shutdown, 's' },
+ { xenstat_domain_blocked, 'b' },
+ { xenstat_domain_crashed, 'c' },
+ { xenstat_domain_paused, 'p' },
+ { xenstat_domain_running, 'r' }
+};
+const unsigned int NUM_STATES = sizeof(state_funcs)/sizeof(*state_funcs);
+
+/* Compare states of two domains, returning -1,0,1 for <,=,> */
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ unsigned int i, d1s, d2s;
+ for(i = 0; i < NUM_STATES; i++) {
+ d1s = state_funcs[i].get(domain1);
+ d2s = state_funcs[i].get(domain2);
+ if(d1s && !d2s)
+ return -1;
+ if(d2s && !d1s)
+ return 1;
+ }
+ return 0;
+}
+
+/* Prints domain state in abbreviated letter format */
+static void print_state(xenstat_domain *domain)
+{
+ unsigned int i;
+ for(i = 0; i < NUM_STATES; i++)
+ print("%c", state_funcs[i].get(domain) ? state_funcs[i].ch
+ : '-');
+}
+
+/* Compares cpu usage of two domains, returning -1,0,1 for <,=,> */
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(xenstat_domain_cpu_ns(domain1),
+ xenstat_domain_cpu_ns(domain2));
+}
+
+/* Prints domain cpu usage in seconds */
+static void print_cpu(xenstat_domain *domain)
+{
+ print("%10llu", xenstat_domain_cpu_ns(domain)/1000000000);
+}
+
+/* Computes the CPU percentage used for a specified domain */
+static double get_cpu_pct(xenstat_domain *domain)
+{
+ xenstat_domain *old_domain;
+ double us_elapsed;
+
+ /* Can't calculate CPU percentage without a previous sample. */
+ if(prev_node == NULL)
+ return 0.0;
+
+ old_domain = xenstat_node_domain(prev_node, xenstat_domain_id(domain));
+ if(old_domain == NULL)
+ return 0.0;
+
+ /* Calculate the time elapsed in microseconds */
+ us_elapsed = ((curtime.tv_sec-oldtime.tv_sec)*1000000.0
+ +(curtime.tv_usec - oldtime.tv_usec));
+
+ /* In the following, nanoseconds must be multiplied by 1000.0 to
+ * convert to microseconds, then divided by 100.0 to get a percentage,
+ * resulting in a multiplication by 10.0 */
+ return ((xenstat_domain_cpu_ns(domain)
+ -xenstat_domain_cpu_ns(old_domain))/10.0)/us_elapsed;
+}
+
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2));
+}
+
+/* Prints cpu percentage statistic */
+static void print_cpu_pct(xenstat_domain *domain)
+{
+ print("%6.1f", get_cpu_pct(domain));
+}
+
+/* Compares current memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(xenstat_domain_cur_mem(domain1),
+ xenstat_domain_cur_mem(domain2));
+}
+
+/* Prints current memory statistic */
+static void print_mem(xenstat_domain *domain)
+{
+ print("%10llu", xenstat_domain_cur_mem(domain)/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_mem_pct(xenstat_domain *domain)
+{
+ print("%6.1f", (double)xenstat_domain_cur_mem(domain) /
+ (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares maximum memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(xenstat_domain_max_mem(domain1),
+ xenstat_domain_max_mem(domain2));
+}
+
+/* Prints maximum domain memory statistic in KB */
+static void print_maxmem(xenstat_domain *domain)
+{
+ unsigned long long max_mem = xenstat_domain_max_mem(domain);
+ if(max_mem == ((unsigned long long)-1))
+ print("%10s", "no limit");
+ else
+ print("%10llu", max_mem/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_max_pct(xenstat_domain *domain)
+{
+ if (xenstat_domain_max_mem(domain) == (unsigned long long)-1)
+ print("%9s", "n/a");
+ else
+ print("%9.1f", (double)xenstat_domain_max_mem(domain) /
+ (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares number of virtual CPUs of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(xenstat_domain_num_vcpus(domain1),
+ xenstat_domain_num_vcpus(domain2));
+}
+
+/* Prints number of virtual CPUs statistic */
+static void print_vcpus(xenstat_domain *domain)
+{
+ print("%5u", xenstat_domain_num_vcpus(domain));
+}
+
+/* Compares number of virtual networks of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(xenstat_domain_num_networks(domain1),
+ xenstat_domain_num_networks(domain2));
+}
+
+/* Prints number of virtual networks statistic */
+static void print_nets(xenstat_domain *domain)
+{
+ print("%4u", xenstat_domain_num_networks(domain));
+}
+
+/* Compares number of total network tx bytes of two domains, returning -1,0,1
for
+ * <,=,> */
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(tot_net_bytes(domain1, FALSE),
+ tot_net_bytes(domain2, FALSE));
+}
+
+/* Prints number of total network tx bytes statistic */
+static void print_net_tx(xenstat_domain *domain)
+{
+ print("%8llu", tot_net_bytes(domain, FALSE)/1024);
+}
+
+/* Compares number of total network rx bytes of two domains, returning -1,0,1
for
+ * <,=,> */
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return -compare(tot_net_bytes(domain1, TRUE),
+ tot_net_bytes(domain2, TRUE));
+}
+
+/* Prints number of total network rx bytes statistic */
+static void print_net_rx(xenstat_domain *domain)
+{
+ print("%8llu", tot_net_bytes(domain, TRUE)/1024);
+}
+
+/* Gets number of total network bytes statistic, if rx true, then rx bytes
+ * otherwise tx bytes
+ */
+static unsigned long long tot_net_bytes(xenstat_domain *domain, int rx_flag)
+{
+ int i = 0;
+ xenstat_network *network;
+ unsigned num_networks = 0;
+ unsigned long long total = 0;
+
+ /* How many networks? */
+ num_networks = xenstat_domain_num_networks(domain);
+
+ /* Dump information for each network */
+ for (i=0; i < num_networks; i++) {
+ /* Next get the network information */
+ network = xenstat_domain_network(domain,i);
+ if (rx_flag)
+ total += xenstat_network_rbytes(network);
+ else
+ total += xenstat_network_tbytes(network);
+ }
+ return (total);
+}
+
+/* Compares security id (ssid) of two domains, returning -1,0,1 for <,=,> */
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+ return compare(xenstat_domain_ssid(domain1),
+ xenstat_domain_ssid(domain2));
+}
+
+/* Prints ssid statistic */
+static void print_ssid(xenstat_domain *domain)
+{
+ print("%4u", xenstat_domain_ssid(domain));
+}
+
+/* Section printing functions */
+/* Prints the top summary, above the domain table */
+void do_summary(void)
+{
+#define TIME_STR_LEN 9
+ const char *TIME_STR_FORMAT = "%H:%M:%S";
+ char time_str[TIME_STR_LEN];
+ unsigned run = 0, block = 0, pause = 0,
+ crash = 0, dying = 0, shutdown = 0;
+ unsigned i, num_domains = 0;
+ unsigned long long used = 0;
+ xenstat_domain *domain;
+
+ /* Print program name, current time, and number of domains */
+ strftime(time_str, TIME_STR_LEN, TIME_STR_FORMAT,
+ localtime(&curtime.tv_sec));
+ num_domains = xenstat_node_num_domains(cur_node);
+ print("xentop - %s\n", time_str);
+
+ /* Tabulate what states domains are in for summary */
+ for (i=0; i < num_domains; i++) {
+ domain = xenstat_node_domain_by_index(cur_node,i);
+ if (xenstat_domain_running(domain)) run++;
+ else if (xenstat_domain_blocked(domain)) block++;
+ else if (xenstat_domain_paused(domain)) pause++;
+ else if (xenstat_domain_shutdown(domain)) shutdown++;
+ else if (xenstat_domain_crashed(domain)) crash++;
+ else if (xenstat_domain_dying(domain)) dying++;
+ }
+
+ print("%u domains: %u running, %u blocked, %u paused, "
+ "%u crashed, %u dying, %u shutdown \n",
+ num_domains, run, block, pause, crash, dying, shutdown);
+
+ used = xenstat_node_tot_mem(cur_node)-xenstat_node_free_mem(cur_node);
+
+ /* Dump node memory and cpu information */
+ print("Mem: %lluk total, %lluk used, %lluk free "
+ "CPUs: %u @ %lluMHz\n",
+ xenstat_node_tot_mem(cur_node)/1024, used/1024,
+ xenstat_node_free_mem(cur_node)/1024,
+ xenstat_node_num_cpus(cur_node),
+ xenstat_node_cpu_hz(cur_node)/1000000);
+}
+
+/* Display the top header for the domain table */
+void do_header(void)
+{
+ field_id i;
+
+ /* Turn on REVERSE highlight attribute for headings */
+ attron(A_REVERSE);
+ for(i = 0; i < NUM_FIELDS; i++) {
+ if(i != 0)
+ print(" ");
+ /* The BOLD attribute is turned on for the sort column */
+ if(i == sort_field)
+ attron(A_BOLD);
+ print("%*s", fields[i].default_width, fields[i].header);
+ if(i == sort_field)
+ attroff(A_BOLD);
+ }
+ attroff(A_REVERSE);
+ print("\n");
+}
+
+/* Displays bottom status line or current prompt */
+void do_bottom_line(void)
+{
+ move(lines()-1, 2);
+
+ if (prompt != NULL) {
+ printw("%s: %s", prompt, prompt_val);
+ } else {
+ addch(A_REVERSE | 'D'); addstr("elay ");
+
+ /* network */
+ addch(A_REVERSE | 'N');
+ attr_addstr(show_networks ? COLOR_PAIR(1) : 0, "etworks");
+ addstr(" ");
+
+ /* vcpus */
+ addch(A_REVERSE | 'V');
+ attr_addstr(show_vcpus ? COLOR_PAIR(1) : 0, "CPUs");
+ addstr(" ");
+
+ /* repeat */
+ addch(A_REVERSE | 'R');
+ attr_addstr(repeat_header ? COLOR_PAIR(1) : 0, "epeat header");
+ addstr(" ");
+
+ /* sort order */
+ addch(A_REVERSE | 'S'); addstr("ort order ");
+
+ addch(A_REVERSE | 'Q'); addstr("uit ");
+ }
+}
+
+/* Prints Domain information */
+void do_domain(xenstat_domain *domain)
+{
+ unsigned int i;
+ for(i = 0; i < NUM_FIELDS; i++) {
+ if(i != 0)
+ print(" ");
+ if(i == sort_field)
+ attron(A_BOLD);
+ fields[i].print(domain);
+ if(i == sort_field)
+ attroff(A_BOLD);
+ }
+ print("\n");
+}
+
+/* Output all vcpu information */
+void do_vcpu(xenstat_domain *domain)
+{
+ int i = 0;
+ unsigned num_vcpus = 0;
+ xenstat_vcpu *vcpu;
+
+ print("VCPUs(sec): ");
+
+ num_vcpus = xenstat_domain_num_vcpus(domain);
+
+ /* for all vcpus dump out values */
+ for (i=0; i< num_vcpus; i++) {
+ vcpu = xenstat_domain_vcpu(domain,i);
+
+ if (i != 0 && (i%5)==0)
+ print("\n ");
+ print(" %2u: %10llus", i, xenstat_vcpu_ns(vcpu)/1000000000);
+ }
+ print("\n");
+}
+
+/* Output all network information */
+void do_network(xenstat_domain *domain)
+{
+ int i = 0;
+ xenstat_network *network;
+ unsigned num_networks = 0;
+
+ /* How many networks? */
+ num_networks = xenstat_domain_num_networks(domain);
+
+ /* Dump information for each network */
+ for (i=0; i < num_networks; i++) {
+ /* Next get the network information */
+ network = xenstat_domain_network(domain,i);
+
+ print("Net%d RX: %8llubytes %8llupkts %8lluerr %8lludrop ",
+ i,
+ xenstat_network_rbytes(network),
+ xenstat_network_rpackets(network),
+ xenstat_network_rerrs(network),
+ xenstat_network_rdrop(network));
+
+ print("TX: %8llubytes %8llupkts %8lluerr %8lludrop\n",
+ xenstat_network_tbytes(network),
+ xenstat_network_tpackets(network),
+ xenstat_network_terrs(network),
+ xenstat_network_tdrop(network));
+ }
+}
+
+static void top(void)
+{
+ xenstat_domain **domains;
+ unsigned int i, num_domains = 0;
+
+ /* Now get the node information */
+ if (prev_node != NULL)
+ xenstat_free_node(prev_node);
+ prev_node = cur_node;
+ cur_node = xenstat_get_node(xhandle, XENSTAT_ALL);
+ if (cur_node == NULL)
+ fail("Failed to retrieve statistics from libxenstat\n");
+
+ /* dump summary top information */
+ do_summary();
+
+ /* Count the number of domains for which to report data */
+ num_domains = xenstat_node_num_domains(cur_node);
+
+ domains = malloc(num_domains*sizeof(xenstat_domain *));
+ if(domains == NULL)
+ fail("Failed to allocate memory\n");
+
+ for (i=0; i < num_domains; i++)
+ domains[i] = xenstat_node_domain_by_index(cur_node, i);
+
+ /* Sort */
+ qsort(domains, num_domains, sizeof(xenstat_domain *),
+ (int(*)(const void *, const void *))compare_domains);
+
+ if(first_domain_index >= num_domains)
+ first_domain_index = num_domains-1;
+
+ for (i = first_domain_index; i < num_domains; i++) {
+ if(current_row() == lines()-1)
+ break;
+ if (i == first_domain_index || repeat_header)
+ do_header();
+ do_domain(domains[i]);
+ if (show_vcpus)
+ do_vcpu(domains[i]);
+ if (show_networks)
+ do_network(domains[i]);
+ }
+
+ do_bottom_line();
+}
+
+int main(int argc, char **argv)
+{
+ int opt, optind = 0;
+ int ch = ERR;
+
+ struct option lopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "networks", no_argument, NULL, 'n' },
+ { "repeat-header", no_argument, NULL, 'r' },
+ { "vcpus", no_argument, NULL, 'v' },
+ { "delay", required_argument, NULL, 'd' },
+ { 0, 0, 0, 0 },
+ };
+ const char *sopts = "hVbnvd:";
+
+ if (atexit(cleanup) != 0)
+ fail("Failed to install cleanup handler.\n");
+
+ while ((opt = getopt_long(argc, argv, sopts, lopts, &optind)) != -1) {
+ switch (opt) {
+ case 'h':
+ case '?':
+ default:
+ usage(argv[0]);
+ exit(0);
+ case 'V':
+ version();
+ exit(0);
+ case 'n':
+ show_networks = 1;
+ break;
+ case 'r':
+ repeat_header = 1;
+ break;
+ case 'v':
+ show_vcpus = 1;
+ break;
+ case 'd':
+ delay = atoi(optarg);
+ break;
+ }
+ }
+
+ /* Get xenstat handle */
+ xhandle = xenstat_init();
+ if (xhandle == NULL)
+ fail("Failed to initialize xenstat library\n");
+
+ /* Begin curses stuff */
+ initscr();
+ start_color();
+ cbreak();
+ noecho();
+ nonl();
+ keypad(stdscr, TRUE);
+ halfdelay(5);
+ use_default_colors();
+ init_pair(1, -1, COLOR_YELLOW);
+
+ do {
+ gettimeofday(&curtime, NULL);
+ if(ch != ERR || (curtime.tv_sec - oldtime.tv_sec) >= delay) {
+ clear();
+ top();
+ oldtime = curtime;
+ refresh();
+ }
+ ch = getch();
+ } while (handle_key(ch));
+
+ /* Cleanup occurs in cleanup(), so no work to do here. */
+
+ return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/COPYING
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/COPYING Thu Aug 25 22:53:20 2005
@@ -0,0 +1,515 @@
+This license (LGPL) applies to the xenstore library which interfaces
+with the xenstore daemon (as stated in xs.c, xs.h, xs_lib.c and
+xs_lib.h). The remaining files in the directory are licensed as
+stated in the comments (as of this writing, GPL, see ../../COPYING).
+
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard. To achieve this, non-free programs must
+be allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least
+ three years, to give the same user the materials specified in
+ Subsection 6a, above, for a charge no more than the cost of
+ performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+ Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/01simple.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/01simple.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,4 @@
+# Create an entry, read it.
+write /test create contents
+expect contents
+read /test
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/02directory.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/02directory.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,47 @@
+# Root directory has only tool dir in it.
+expect tool
+dir /
+
+# Create a file.
+write /test create contents
+
+# Directory shows it.
+expect test
+expect tool
+dir /
+
+# Make a new directory, check it's there
+mkdir /dir
+expect dir
+expect test
+expect tool
+dir /
+
+# Check it's empty.
+dir /dir
+
+# Create a file, check it exists.
+write /dir/test2 create contents2
+expect test2
+dir /dir
+expect contents2
+read /dir/test2
+
+# Creating dir over the top should fail.
+expect mkdir failed: File exists
+mkdir /dir
+expect mkdir failed: File exists
+mkdir /dir/test2
+
+# Mkdir implicitly creates directories.
+mkdir /dir/1/2/3/4
+expect test2
+expect 1
+dir /dir
+expect 2
+dir /dir/1
+expect 3
+dir /dir/1/2
+expect 4
+dir /dir/1/2/3
+dir /dir/1/2/3/4
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/03write.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/03write.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,39 @@
+# Write without create fails.
+expect write failed: No such file or directory
+write /test none contents
+
+# Exclusive write succeeds
+write /test excl contents
+expect contents
+read /test
+
+# Exclusive write fails to overwrite.
+expect write failed: File exists
+write /test excl contents
+
+# Non-exclusive overwrite succeeds.
+write /test none contents2
+expect contents2
+read /test
+write /test create contents3
+expect contents3
+read /test
+
+# Write should implicitly create directories
+write /dir/test create contents
+expect test
+dir /dir
+expect contents
+read /dir/test
+write /dir/1/2/3/4 excl contents4
+expect test
+expect 1
+dir /dir
+expect 2
+dir /dir/1
+expect 3
+dir /dir/1/2
+expect 4
+dir /dir/1/2/3
+expect contents4
+read /dir/1/2/3/4
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/04rm.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/04rm.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,18 @@
+# Remove non-existant fails.
+expect rm failed: No such file or directory
+rm /test
+expect rm failed: No such file or directory
+rm /dir/test
+
+# Create file and remove it
+write /test excl contents
+rm /test
+
+# Create directory and remove it.
+mkdir /dir
+rm /dir
+
+# Create directory, create file, remove all.
+mkdir /dir
+write /dir/test excl contents
+rm /dir
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/05filepermissions.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/05filepermissions.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,81 @@
+# Fail to get perms on non-existent file.
+expect getperm failed: No such file or directory
+getperm /test
+expect getperm failed: No such file or directory
+getperm /dir/test
+
+# Create file: inherits from root (0 READ)
+write /test excl contents
+expect 0 READ
+getperm /test
+setid 1
+expect 0 READ
+getperm /test
+expect contents
+read /test
+expect write failed: Permission denied
+write /test none contents
+
+# Take away read access to file.
+setid 0
+setperm /test 0 NONE
+setid 1
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+expect write failed: Permission denied
+write /test none contents
+
+# Grant everyone write access to file.
+setid 0
+setperm /test 0 WRITE
+setid 1
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+write /test none contents2
+setid 0
+expect contents2
+read /test
+
+# Grant everyone both read and write access.
+setperm /test 0 READ/WRITE
+setid 1
+expect 0 READ/WRITE
+getperm /test
+expect contents2
+read /test
+write /test none contents3
+expect contents3
+read /test
+
+# Change so that user 1 owns it, noone else can do anything.
+setid 0
+setperm /test 1 NONE
+setid 1
+expect 1 NONE
+getperm /test
+expect contents3
+read /test
+write /test none contents4
+
+# User 2 can do nothing.
+setid 2
+expect setperm failed: Permission denied
+setperm /test 2 NONE
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+expect write failed: Permission denied
+write /test none contents4
+
+# Tools can always access things.
+setid 0
+expect 1 NONE
+getperm /test
+expect contents4
+read /test
+write /test none contents5
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/06dirpermissions.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/06dirpermissions.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,127 @@
+# Root directory: owned by tool, everyone has read access.
+expect 0 READ
+getperm /
+
+# Create directory: inherits from root.
+mkdir /dir
+expect 0 READ
+getperm /dir
+setid 1
+expect 0 READ
+getperm /dir
+dir /dir
+expect write failed: Permission denied
+write /dir/test create contents2
+
+# Remove everyone's read access to directoy.
+setid 0
+setperm /dir 0 NONE
+setid 1
+expect dir failed: Permission denied
+dir /dir
+expect read failed: Permission denied
+read /dir/test create contents2
+expect write failed: Permission denied
+write /dir/test create contents2
+
+# Grant everyone write access to directory.
+setid 0
+setperm /dir 0 WRITE
+setid 1
+expect getperm failed: Permission denied
+getperm /dir
+expect dir failed: Permission denied
+dir /dir
+write /dir/test create contents
+setid 0
+expect 1 WRITE
+getperm /dir/test
+setperm /dir/test 0 NONE
+expect contents
+read /dir/test
+
+# Grant everyone both read and write access.
+setperm /dir 0 READ/WRITE
+setid 1
+expect 0 READ/WRITE
+getperm /dir
+expect test
+dir /dir
+write /dir/test2 create contents
+expect contents
+read /dir/test2
+setperm /dir/test2 1 NONE
+
+# Change so that user 1 owns it, noone else can do anything.
+setid 0
+setperm /dir 1 NONE
+expect 1 NONE
+getperm /dir
+expect test
+expect test2
+dir /dir
+write /dir/test3 create contents
+
+# User 2 can do nothing. Can't even tell if file exists.
+setid 2
+expect setperm failed: Permission denied
+setperm /dir 2 NONE
+expect getperm failed: Permission denied
+getperm /dir
+expect dir failed: Permission denied
+dir /dir
+expect read failed: Permission denied
+read /dir/test
+expect read failed: Permission denied
+read /dir/test2
+expect read failed: Permission denied
+read /dir/test3
+expect read failed: Permission denied
+read /dir/test4
+expect write failed: Permission denied
+write /dir/test none contents
+expect write failed: Permission denied
+write /dir/test create contents
+expect write failed: Permission denied
+write /dir/test excl contents
+expect write failed: Permission denied
+write /dir/test4 none contents
+expect write failed: Permission denied
+write /dir/test4 create contents
+expect write failed: Permission denied
+write /dir/test4 excl contents
+
+# Tools can always access things.
+setid 0
+expect 1 NONE
+getperm /dir
+expect test
+expect test2
+expect test3
+dir /dir
+write /dir/test4 create contents
+
+# Inherited by child.
+mkdir /dir/subdir
+expect 1 NONE
+getperm /dir/subdir
+write /dir/subfile excl contents
+expect 1 NONE
+getperm /dir/subfile
+
+# But for domains, they own it.
+setperm /dir/subdir 2 READ/WRITE
+expect 2 READ/WRITE
+getperm /dir/subdir
+setid 3
+write /dir/subdir/subfile excl contents
+expect 3 READ/WRITE
+getperm /dir/subdir/subfile
+
+# Inheritence works through multiple directories, too.
+write /dir/subdir/1/2/3/4 excl contents
+expect 3 READ/WRITE
+getperm /dir/subdir/1/2/3/4
+mkdir /dir/subdir/a/b/c/d
+expect 3 READ/WRITE
+getperm /dir/subdir/a/b/c/d
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/07watch.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/07watch.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,194 @@
+# Watch something, write to it, check watch has fired.
+write /test create contents
+
+1 watch /test token
+2 write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Check that reads don't set it off.
+1 watch /test token
+expect 2:contents2
+2 read /test
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# mkdir, setperm and rm should (also tests watching dirs)
+mkdir /dir
+1 watch /dir token
+2 mkdir /dir/newdir
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+2 setperm /dir/newdir 0 READ
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+2 rm /dir/newdir
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+1 close
+2 close
+
+# We don't get a watch from our own commands.
+watch /dir token
+mkdir /dir/newdir
+expect waitwatch failed: Connection timed out
+waitwatch
+close
+
+# ignore watches while doing commands, should work.
+watch /dir token
+1 write /dir/test create contents
+expect contents
+read /dir/test
+expect /dir/test:token
+waitwatch
+ackwatch token
+close
+
+# watch priority test: all simultaneous
+1 watch /dir token1
+3 watch /dir token3
+2 watch /dir token2
+write /dir/test create contents
+expect 3:/dir/test:token3
+3 waitwatch
+3 ackwatch token3
+expect 2:/dir/test:token2
+2 waitwatch
+2 ackwatch token2
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+3 close
+
+# If one dies (without acking), the other should still get ack.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+expect 2:/dir/test:token2
+2 waitwatch
+2 close
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+
+# If one dies (without reading at all), the other should still get ack.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+2 close
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+
+# unwatch
+1 watch /dir token1
+1 unwatch /dir token1
+1 watch /dir token2
+2 write /dir/test2 create contents
+expect 1:/dir/test2:token2
+1 waitwatch
+1 unwatch /dir token2
+1 close
+2 close
+
+# unwatch while watch pending. Other watcher still gets the event.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+2 unwatch /dir token2
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+
+# unwatch while watch pending. Should clear this so we get next event.
+1 watch /dir token1
+write /dir/test create contents
+1 unwatch /dir token1
+1 watch /dir/test token2
+write /dir/test none contents2
+expect 1:/dir/test:token2
+1 waitwatch
+1 ackwatch token2
+
+# check we only get notified once.
+1 watch /test token
+2 write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# watches are queued in order.
+1 watch / token
+2 write /test1 create contents
+2 write /test2 create contents
+2 write /test3 create contents
+expect 1:/test1:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test2:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test3:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Creation of subpaths should be covered correctly.
+1 watch / token
+2 write /test/subnode create contents2
+2 write /test/subnode/subnode create contents2
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test/subnode/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# Watch event must have happened before we registered interest.
+1 watch / token
+2 write /test/subnode create contents2
+1 watch / token2 0
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# Rm fires notification on child.
+1 watch /test/subnode token
+2 rm /test
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+
+# Watch should not double-send after we ack, even if we did something in
between.
+1 watch /test2 token
+2 write /test2/foo create contents2
+expect 1:/test2/foo:token
+1 waitwatch
+expect 1:contents2
+1 read /test2/foo
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/08transaction.slowtest
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/08transaction.slowtest Thu Aug 25 22:53:20 2005
@@ -0,0 +1,21 @@
+# Test transaction timeouts. Take a second each.
+
+mkdir /test
+write /test/entry1 create contents
+
+# Transactions can take as long as the want...
+start /test
+sleep 1100
+rm /test/entry1
+commit
+dir /test
+
+# ... as long as noone is waiting.
+1 start /test
+notimeout
+2 mkdir /test/dir
+1 mkdir /test/dir
+expect 1:dir
+1 dir /test
+expect 1: commit failed: Connection timed out
+1 commit
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/08transaction.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/08transaction.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,96 @@
+# Test transactions.
+
+mkdir /test
+
+# Simple transaction: create a file inside transaction.
+1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+expect 1:entry1
+1 dir /test
+1 commit
+expect 2:contents
+2 read /test/entry1
+
+rm /test/entry1
+
+# Create a file and abort transaction.
+1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+expect 1:entry1
+1 dir /test
+1 abort
+2 dir /test
+
+write /test/entry1 create contents
+# Delete in transaction, commit
+1 start /test
+1 rm /test/entry1
+expect 2:entry1
+2 dir /test
+1 dir /test
+1 commit
+2 dir /test
+
+# Delete in transaction, abort.
+write /test/entry1 create contents
+1 start /test
+1 rm /test/entry1
+expect 2:entry1
+2 dir /test
+1 dir /test
+1 abort
+expect 2:entry1
+2 dir /test
+
+# Events inside transactions don't trigger watches until (successful) commit.
+mkdir /test/dir
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+2 close
+1 close
+
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+2 abort
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+2 commit
+expect 1:/test/dir/sub:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Rm inside transaction works like rm outside: children get notified.
+1 watch /test/dir/sub token
+2 start /test
+2 rm /test/dir
+2 commit
+expect 1:/test/dir/sub:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Multiple events from single transaction don't trigger assert
+1 watch /test token
+2 start /test
+2 write /test/1 create contents
+2 write /test/2 create contents
+2 commit
+expect 1:/test/1:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test/2:token
+1 waitwatch
+1 ackwatch token
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/09domain.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/09domain.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,19 @@
+# Test domain communication.
+
+# Create a domain, write an entry.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 write /entry1 create contents
+expect entry1
+expect tool
+dir /
+close
+
+# Release that domain.
+release 1
+close
+
+# Introduce and release by same connection.
+expect handle is 2
+introduce 1 100 7 /my/home
+release 1
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/10domain-homedir.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,19 @@
+# Test domain "implicit" paths.
+
+# Create a domain, write an entry using implicit path, read using implicit
+mkdir /home
+expect handle is 1
+introduce 1 100 7 /home
+1 write entry1 create contents
+expect contents
+read /home/entry1
+expect entry1
+dir /home
+
+# Place a watch using a relative path: expect relative answer.
+1 mkdir foo
+1 watch foo token
+write /home/foo/bar create contents
+expect 1:foo/bar:token
+1 waitwatch
+1 ackwatch token
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/11domain-watch.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,52 @@
+# Test watching from a domain.
+
+# Watch something, write to it, check watch has fired.
+write /test create contents
+mkdir /dir
+
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /test token
+write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+1 unwatch /test token
+release 1
+1 close
+
+# ignore watches while doing commands, should work.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token
+write /dir/test create contents
+1 write /dir/test2 create contents2
+1 write /dir/test3 create contents3
+1 write /dir/test4 create contents4
+expect 1:/dir/test:token
+1 waitwatch
+1 ackwatch token
+release 1
+1 close
+
+# unwatch
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token1
+1 unwatch /dir token1
+1 watch /dir token2
+write /dir/test2 create contents
+expect 1:/dir/test2:token2
+1 waitwatch
+1 unwatch /dir token2
+release 1
+1 close
+
+# unwatch while watch pending.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token1
+write /dir/test2 create contents
+1 unwatch /dir token1
+release 1
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/12readonly.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/12readonly.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,41 @@
+# Test that read only connection can't alter store.
+
+write /test create contents
+
+readonly
+expect test
+expect tool
+dir /
+
+expect contents
+read /test
+expect 0 READ
+getperm /test
+watch /test token
+unwatch /test token
+start /
+commit
+start /
+abort
+
+# These don't work
+expect write failed: Read-only file system
+write /test2 create contents
+expect write failed: Read-only file system
+write /test create contents
+expect setperm failed: Read-only file system
+setperm /test 100 NONE
+expect setperm failed: Read-only file system
+setperm /test 100 NONE
+expect shutdown failed: Read-only file system
+shutdown
+expect introduce failed: Read-only file system
+introduce 1 100 7 /home
+
+# Check that watches work like normal.
+watch / token
+1 readwrite
+1 write /test create contents
+expect /test:token
+waitwatch
+ackwatch token
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/13watch-ack.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,22 @@
+# This demonstrates a bug where an xs_acknowledge_watch returns
+# EINVAL, because the daemon doesn't track what watch event it sent
+# and relies on it being the "first" watch which has an event.
+# Watches firing after the first event is sent out will change this.
+
+# Create three things to watch.
+mkdir /test
+mkdir /test/1
+mkdir /test/2
+mkdir /test/3
+
+# Watch all three, fire event on 2, read watch, fire event on 1 and 3, ack 2.
+1 watch /test/1 token1
+1 watch /test/2 token2
+1 watch /test/3 token3
+2 write /test/2 create contents2
+expect 1:/test/2:token2
+1 waitwatch
+3 write /test/1 create contents1
+4 write /test/3 create contents3
+1 ackwatch token2
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77
tools/xenstore/testsuite/14complexperms.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/14complexperms.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,99 @@
+# We should not be able to tell the difference between a node which
+# doesn't exist, and a node we don't have permission on, if we don't
+# have permission on it directory.
+
+mkdir /dir
+setperm /dir 0 NONE
+
+# First when it doesn't exist
+setid 1
+expect *Permission denied
+dir /dir/file
+expect *Permission denied
+read /dir/file
+expect *Permission denied
+write /dir/file none value
+expect *Permission denied
+write /dir/file create value
+expect *Permission denied
+write /dir/file excl value
+expect write failed: Invalid argument
+write /dir/file crap value
+expect *Permission denied
+mkdir /dir/file
+expect *Permission denied
+rm /dir/file
+expect *Permission denied
+rm /dir
+expect *Permission denied
+getperm /dir/file
+expect *Permission denied
+setperm /dir/file 0 NONE
+watch /dir/file token
+setid 0
+write /dir/file create contents
+rm /dir/file
+setid 1
+expect waitwatch failed: Connection timed out
+waitwatch
+unwatch /dir/file token
+expect *No such file or directory
+unwatch /dir/file token
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+abort
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+commit
+expect *Permission denied
+introduce 2 100 7 /dir/file
+
+# Now it exists
+setid 0
+write /dir/file create contents
+
+setid 1
+expect *Permission denied
+dir /dir/file
+expect *Permission denied
+read /dir/file
+expect *Permission denied
+write /dir/file none value
+expect *Permission denied
+write /dir/file create value
+expect *Permission denied
+write /dir/file excl value
+expect write failed: Invalid argument
+write /dir/file crap value
+expect *Permission denied
+mkdir /dir/file
+expect *Permission denied
+rm /dir/file
+expect *Permission denied
+rm /dir
+expect *Permission denied
+getperm /dir/file
+expect *Permission denied
+setperm /dir/file 0 NONE
+watch /dir/file token
+setid 0
+write /dir/file create contents
+rm /dir/file
+setid 1
+expect waitwatch failed: Connection timed out
+waitwatch
+unwatch /dir/file token
+expect *No such file or directory
+unwatch /dir/file token
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+abort
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+commit
+expect *Permission denied
+introduce 2 100 7 /dir/file
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/15nowait.test
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/15nowait.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,25 @@
+# If we don't wait for an ack, we can crash daemon as it never expects to be
+# sending out two replies on top of each other.
+noackwrite /1 create 1
+noackwrite /2 create 2
+noackwrite /3 create 3
+noackwrite /4 create 4
+noackwrite /5 create 5
+readack
+readack
+readack
+readack
+readack
+
+expect handle is 1
+introduce 1 100 7 /my/home
+1 noackwrite /1 create 1
+1 noackwrite /2 create 2
+1 noackwrite /3 create 3
+1 noackwrite /4 create 4
+1 noackwrite /5 create 5
+1 readack
+1 readack
+1 readack
+1 readack
+1 readack
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_crashme.c
--- /dev/null Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_crashme.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,413 @@
+/* Code which randomly corrupts bits going to the daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/time.h>
+#include "xs.h"
+#include "talloc.h"
+#include <errno.h>
+#include "xenstored.h"
+
+#define XSTEST
+#define RAND_FREQ 128 /* One char in 32 is corrupted. */
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@xxxxxxxxxxxxxxxx)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose. It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@xxxxxxxxxx)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault. -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO 0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes. No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+ const u8 *k = key;
+
+ len = length;
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+
+ while (len >= 12) {
+ a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+ b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+ c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+ __jhash_mix(a,b,c);
+
+ k += 12;
+ len -= 12;
+ }
+
+ c += length;
+ switch (len) {
+ case 11: c += ((u32)k[10]<<24);
+ case 10: c += ((u32)k[9]<<16);
+ case 9 : c += ((u32)k[8]<<8);
+ case 8 : b += ((u32)k[7]<<24);
+ case 7 : b += ((u32)k[6]<<16);
+ case 6 : b += ((u32)k[5]<<8);
+ case 5 : b += k[4];
+ case 4 : a += ((u32)k[3]<<24);
+ case 3 : a += ((u32)k[2]<<16);
+ case 2 : a += ((u32)k[1]<<8);
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+ len = length;
+
+ while (len >= 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ k += 3; len -= 3;
+ }
+
+ c += length * 4;
+
+ switch (len) {
+ case 2 : b += k[1];
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ * done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += JHASH_GOLDEN_RATIO;
+ b += JHASH_GOLDEN_RATIO;
+ c += initval;
+
+ __jhash_mix(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return jhash_3words(a, 0, 0, initval);
+}
+
+static unsigned int get_randomness(int *state)
+{
+ return jhash_1word((*state)++, *state * 1103515243);
+}
+
+static int state;
+
+/* Lengthening headers is pointless: other end will just wait for more
+ * data and timeout. We merely shorten the length. */
+static void corrupt_header(char *output, const struct xsd_sockmsg *msg,
+ unsigned int *next_bit)
+{
+ struct xsd_sockmsg newmsg = *msg;
+
+ while (*next_bit < sizeof(*msg)) {
+ if (newmsg.len)
+ newmsg.len = get_randomness(&state) % newmsg.len;
+ *next_bit += get_randomness(&state) % RAND_FREQ;
+ }
+ memcpy(output, &newmsg, sizeof(newmsg));
+}
+
+#define read_all_choice read_all
+static bool write_all_choice(int fd, const void *data, unsigned int len)
+{
+ char corrupt_data[len];
+ bool ret;
+ static unsigned int next_bit;
+
+ if (len == sizeof(struct xsd_sockmsg)
+ && ((unsigned long)data % __alignof__(struct xsd_sockmsg)) == 0)
+ corrupt_header(corrupt_data, data, &next_bit);
+ else {
+ memcpy(corrupt_data, data, len);
+ while (next_bit < len * CHAR_BIT) {
+ corrupt_data[next_bit/CHAR_BIT]
+ ^= (1 << (next_bit%CHAR_BIT));
+ next_bit += get_randomness(&state) % RAND_FREQ;
+ }
+ }
+
+ ret = xs_write_all(fd, corrupt_data, len);
+ next_bit -= len * CHAR_BIT;
+ return ret;
+}
+
+#include "xs.c"
+
+static char *random_path(void)
+{
+ unsigned int i;
+ char *ret = NULL;
+
+ if (get_randomness(&state) % 20 == 0)
+ return talloc_strdup(NULL, "/");
+
+ for (i = 0; i < 1 || (get_randomness(&state) % 2); i++) {
+ ret = talloc_asprintf_append(ret, "/%i",
+ get_randomness(&state) % 15);
+ }
+ return ret;
+}
+
+static int random_flags(int *state)
+{
+ switch (get_randomness(state) % 4) {
+ case 0:
+ return 0;
+ case 1:
+ return O_CREAT;
+ case 2:
+ return O_CREAT|O_EXCL;
+ default:
+ return get_randomness(state);
+ }
+}
+
+/* Do the next operation, return the results. */
+static void do_next_op(struct xs_handle *h, bool verbose)
+{
+ char *name;
+ unsigned int num;
+
+ if (verbose)
+ printf("State %i: ", state);
+
+ name = random_path();
+ switch (get_randomness(&state) % 9) {
+ case 0:
+ if (verbose)
+ printf("DIR %s\n", name);
+ free(xs_directory(h, name, &num));
+ break;
+ case 1:
+ if (verbose)
+ printf("READ %s\n", name);
+ free(xs_read(h, name, &num));
+ break;
+ case 2: {
+ int flags = random_flags(&state);
+ char *contents = talloc_asprintf(NULL, "%i",
+ get_randomness(&state));
+ unsigned int len = get_randomness(&state)%(strlen(contents)+1);
+ if (verbose)
+ printf("WRITE %s %s %.*s\n", name,
+ flags == O_CREAT ? "O_CREAT"
+ : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
+ : flags == 0 ? "0" : "CRAPFLAGS",
+ len, contents);
+ xs_write(h, name, contents, len, flags);
+ break;
+ }
+ case 3:
+ if (verbose)
+ printf("MKDIR %s\n", name);
+ xs_mkdir(h, name);
+ break;
+ case 4:
+ if (verbose)
+ printf("RM %s\n", name);
+ xs_rm(h, name);
+ break;
+ case 5:
+ if (verbose)
+ printf("GETPERMS %s\n", name);
+ free(xs_get_permissions(h, name, &num));
+ break;
+ case 6: {
+ unsigned int i, num = get_randomness(&state)%8;
+ struct xs_permissions perms[num];
+
+ if (verbose)
+ printf("SETPERMS %s: ", name);
+ for (i = 0; i < num; i++) {
+ perms[i].id = get_randomness(&state)%8;
+ perms[i].perms = get_randomness(&state)%4;
+ if (verbose)
+ printf("%i%c ", perms[i].id,
+ perms[i].perms == XS_PERM_WRITE ? 'W'
+ : perms[i].perms == XS_PERM_READ ? 'R'
+ : perms[i].perms ==
+ (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
+ : 'N');
+ }
+ if (verbose)
+ printf("\n");
+ xs_set_permissions(h, name, perms, num);
+ break;
+ }
+ case 7: {
+ if (verbose)
+ printf("START %s\n", name);
+ xs_transaction_start(h, name);
+ break;
+ }
+ case 8: {
+ bool abort = (get_randomness(&state) % 2);
+
+ if (verbose)
+ printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
+ xs_transaction_end(h, abort);
+ break;
+ }
+ default:
+ barf("Impossible randomness");
+ }
+}
+
+static struct xs_handle *h;
+static void alarmed(int sig __attribute__((unused)))
+{
+ /* We force close on timeout. */
+ close(h->fd);
+}
+
+static int start_daemon(void)
+{
+ int fds[2];
+ int daemon_pid;
+
+ /* Start daemon. */
+ pipe(fds);
+ if ((daemon_pid = fork())) {
+ /* Child writes PID when its ready: we wait for that. */
+ char buffer[20];
+ close(fds[1]);
+ if (read(fds[0], buffer, sizeof(buffer)) < 0)
+ barf("Failed to summon daemon");
+ close(fds[0]);
+ return daemon_pid;
+ } else {
+ dup2(fds[1], STDOUT_FILENO);
+ close(fds[0]);
+#if 1
+ execlp("valgrind", "valgrind",
"--log-file=/tmp/xs_crashme.vglog", "-q", "./xenstored_test", "--output-pid",
+ "--no-fork", "--trace-file=/tmp/trace", NULL);
+#else
+ execlp("./xenstored_test", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#endif
+ exit(1);
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ unsigned int i;
+ int pid;
+
+ if (argc != 3 && argc != 4)
+ barf("Usage: xs_crashme <iterations> <seed> [pid]");
+
+ if (argc == 3)
+ pid = start_daemon();
+ else
+ pid = atoi(argv[3]);
+
+ state = atoi(argv[2]);
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Opening connection to daemon");
+ signal(SIGALRM, alarmed);
+ for (i = 0; i < (unsigned)atoi(argv[1]); i++) {
+ alarm(1);
+ do_next_op(h, false);
+ if (i % (atoi(argv[1]) / 72 ?: 1) == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ if (kill(pid, 0) != 0)
+ barf_perror("Pinging daemon on iteration %i", i);
+ if (h->fd < 0) {
+ xs_daemon_close(h);
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Connecting on iteration %i", i);
+ }
+ }
+ kill(pid, SIGTERM);
+ return 0;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 docs/misc/shype4xen_readme.txt
--- a/docs/misc/shype4xen_readme.txt Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,588 +0,0 @@
-Copyright: IBM Corporation (C)
-20 June 2005
-Author: Reiner Sailer
-
-This document is a very short introduction into the sHype access control
-security architecture implementation and how it is perceived by users. It
-is a very preliminary draft for the courageous ones to get "their feet wet"
-and to be able to give feedback (via the xen-devel/xense-devel mailing lists).
-
-Install:
-
-cd into xeno-unstable.bk
-(use --dry-run option if you want to test the patch only)
-patch -p1 -g0 < *tools.diff
-patch -p1 -g0 < *xen.diff
-
-(no rejects, probably some line offsets)
-
-make uninstall; make mrproper; make; ./install.sh should install the default
-sHype into Xen (rebuild your initrd images if necessary). Reboot.
-
-Debug output: there are two triggers for debug output:
-a) General sHype debug:
- xeno-unstable.bk/xen/include/public/acm.h
- undefine ACM_DEBUG to switch this debug off
-
-b) sHype enforcement hook trace: This prints a small trace for each
enforcement
-hook that is executed. The trigger is in
- xeno-unstable.bk/xen/include/acm/acm_hooks.h
- undefine ACM_TRACE_MODE to switch this debug off
-
-1. The default NULL policy
-***************************
-When you apply the patches and startup xen, you should at first not notice any
-difference because the default policy is the "NULL" policy, which as the name
-implies does not enforce anything.
-
-To display the currently enforced policy, use the policy tool under xeno-
-unstable.bk/tools/policy: policy_tool getpolicy. You should see output like
the
-one below.
-
-[root@laptop policy]#./policy_tool getpolicy
-
-Policy dump:
-============
-Magic = 1debc.
-PolVer = aaaa0000.
-Len = 14.
-Primary = NULL policy (c=0, off=14).
-Secondary = NULL policy (c=0, off=14).
-No primary policy (NULL).
-No secondary policy (NULL).
-
-Policy dump End.
-
-Since this is a dump of a binary policy, it's not pretty. The important parts
-are the "Primary" and "Secondary" policy fields set to "NULL policy". sHype
-currently allows to set two independent policies; thus the two SSID-REF parts
-shown in 'xm list'. Right here: primary policy only means this policy is
-checked first, the secondary policy is checked if the primary results in
-"permitted access". The result of the combined policy is "permitted" if both
-policies return permitted (NULL policy always returns permitted). The result
is
-"denied" if at least one of the policies returns "denied". Look into xeno-
-unstable.bk/xen/include/acm/acm_hooks.h for the general hook structure
-integrating the policy decisions (if you like, you won't need it for the rest
-of the Readme file).
-
-2. Setting Chinese Wall and Simple Type Enforcement policies:
-*************************************************************
-
-We'll get fast to the point. However, in order to understand what we are
doing,
-we must at least understand the purpose of the policies that we are going to
-enforce. The two policies presented here are just examples and the
-implementation encourages adding new policies easily.
-
-2.1. Chinese Wall policy: "decides whether a domain can be started based on
-this domain's ssidref and the ssidrefs of the currently running domains".
-Generally, the Chinese wall policy allows specifying certain types (or classes
-or categories, whatever the preferred word) that conflict; we usually assign a
-type to a workload and the set of types of those workloads running in a domain
-make up the type set for this domain. Each domain is assigned a set of types
-through its SSID-REF (we register Chinese Wall as primary policy, so the
-ssidref used for determining the Chinese Wall types is the one annotated with
-"p:" in xm list) since each SSID-REF points at a set of types. We'll see how
-SSIDREFs are represented in Xen later when we will look at the policy. (A good
-read for Chinese Wall is: Brewer/Nash The Chinese Wall Security Policy 1989.)
-
-So let's assume the Chinese Wall policy we are running distinguishes 10 types:
-t0 ... t9. Let us assume further that each SSID-REF points to a set that
-includes exactly one type (attached to domains that run workloads of a single
-type). SSID-REF 0 points to {t0}, ssidref 1 points to {t1} ... 9 points to
-{t9}. [This is actually the example policy we are going to push into xen later]
-
-Now the Chinese Wall policy allows you to define "Conflict type sets" and it
-guarantees that of any conflict set at most one type is "running" at any time.
-As an example, we have defined 2 conflict set: {t2, t3} and {t0, t5, t6}.
-Specifying these conflict sets, sHype ensures that at most one type of each
set
-is running (either t2 or t3 but not both; either t0 or t5 or t6 but not
-multiple of them).
-
-The effect is that administrators can define which workload types cannot run
-simultaneously on a single Xen system. This is useful to limit the covert
-timing channels between such payloads or to ensure that payloads don't
-interfere with each other through existing resource dependencies.
-
-2.2. Simple Type Enforcement (ste) policy: "decides whether two domains can
-share data, e.g., setup event channels or grant tables to each other, based on
-the two domains' ssidref. This, as the name says, is a simple policy. Think of
-each type as of a single color. Each domain has one or more colors, i.e., the
-domains ssid for the ste policy points to a set that has set one or multiple
-types. Let us assume in our example policy we differentiate 5 colors (types)
-and define 5 different ssids referenced by ssidref=0..4. Each ssid shall have
-exactly one type set, i.e., describes a uni-color. Only ssid(0) has all types
-set, i.e., has all defined colors.
-
-Sharing is enforced by the ste policy by requiring that two domains that want
-to establish an event channel or grant pages to each other must have a common
-color. Currently all domains communicate through DOM0 by default; i.e.,
Domain0
-will necessarily have all colors to be able to create domains (thus, we will
-assign ssidref(0) to Domain0 in our example below.
-
-More complex mandatory access control policies governing sharing will follow;
-such policies are more sophisticated than the "color" scheme above by allowing
-more flexible (and complex :_) access control decisions than "share a color"
or
-"don't share a color" and will be able to express finer-grained policies.
-
-
-2.3 Binary Policy:
-In the future, we will have a policy tool that takes as input a more humane
-policy description, using types such as development, home-banking, donated-
-Grid, CorpA-Payload ... and translates the respective policy into what we see
-today as the binary policy using 1s and 0s and sets of them. For now, we must
-live with the binary policy when working with sHype.
-
-
-2.4 Exemplary use of a real sHype policy on Xen. To activate a real policy,
-edit the file (yes, this will soon be a compile option):
- xeno-unstable.bk/xen/include/public/acm.h
- Change: #define ACM_USE_SECURITY_POLICY ACM_NULL_POLICY
- To : #define ACM_USE_SECURITY_POLICY
ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
- cd xeno-unstable.bk
- make mrproper
- make uninstall (manually remove /etc/xen.old if necessary)
- make
- ./install.sh (recreate your kernel initrd's if necessary)
- Reboot into new xen.gz
-
-After booting, check out 'xm dmesg'; should show somewhere in the middle:
-
-(XEN) acm_init: Enforcing Primary CHINESE WALL policy, Secondary SIMPLE TYPE
-ENFORCEMENT policy.
-
-Even though you can activate those policies in any combination and also
-independently, the policy tool currently only supports setting the policy for
-the above combination.
-
-Now look at the minimal startup policy with:
- xeno-unstable.bk/tools/policytool getpolicy
-
-You should see something like:
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic = 1debc.
-PolVer = aaaa0000.
-Len = 36.
-Primary = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=2c).
-
-
-Chinese Wall policy:
-====================
-Max Types = 1.
-Max Ssidrefs = 1.
-Max ConfSets = 1.
-Ssidrefs Off = 10.
-Conflicts Off = 12.
-Runing T. Off = 14.
-C. Agg. Off = 16.
-
-SSID To CHWALL-Type matrix:
-
- ssidref 0: 00
-
-Confict Sets:
-
- c-set 0: 00
-
-Running
-Types: 00
-
-Conflict
-Aggregate Set: 00
-
-
-Simple Type Enforcement policy:
-===============================
-Max Types = 1.
-Max Ssidrefs = 1.
-Ssidrefs Off = 8.
-
-SSID To STE-Type matrix:
-
- ssidref 0: 01
-
-
-Policy dump End.
-
-This is a minimal policy (of little use), except it will disable starting any
-domain that does not have ssidref set to 0x0. The Chinese Wall policy has
-nothing to enforce and the ste policy only knows one type, which is set for
the
-only defined ssidref.
-
-The item that defines the ssidref in a domain configuration is:
-
-ssidref = 0x12345678
-
-Where ssidref is interpreted as a 32bit number, where the lower 16bits become
-the ssidref for the primary policy and the higher 16bits become the ssidref
for
-the secondary policy. sHype currently supports two policies but this is an
-implementation decision and can be extended if necessary.
-
-This reference defines the security information of a domain. The meaning of
the
-SSID-REF depends on the policy, so we explain it when we explain the real
-policies.
-
-
-Setting a new Security Policy:
-******************************
-The policy tool with all its current limitations has one usable example policy
-compiled-in. Please try at this time to use the setpolicy command:
- xeno-unstable.bk/tools/policy/policy_tool setpolicy
-
-You should see a dump of the policy you are setting. It should say at the very
-end:
-
-Policy successfully set.
-
-Now try to dump the currently enforced policy, which is the policy we have
just
-set and the dynamic security state information of this policy
-(<<< ... some additional explanations)
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic = 1debc.
-PolVer = aaaa0000.
-Len = 112.
-Primary = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types = a.
-Max Ssidrefs = 5.
-Max ConfSets = 2.
-Ssidrefs Off = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off = b0.
-
-SSID To CHWALL-Type matrix:
-
- ssidref 0: 01 00 00 00 00 00 00 00 00 00 <<< type0 is set for ssidref0
- ssidref 1: 00 01 00 00 00 00 00 00 00 00
- ssidref 2: 00 00 01 00 00 00 00 00 00 00
- ssidref 3: 00 00 00 01 00 00 00 00 00 00
- ssidref 4: 00 00 00 00 01 00 00 00 00 00 <<< type4 is set for ssidref4
- <<< types 5-9 are unused
-Confict Sets:
-
- c-set 0: 00 00 01 01 00 00 00 00 00 00 <<< type2 and type3 never run
together
- c-set 1: 01 00 00 00 00 01 01 00 00 00 <<< only one of types 0, 5 or 6
- <<< can run simultaneously
-Running
-Types: 01 00 00 00 00 00 00 00 00 00 <<< ref-count for types of
running domains
-
-Conflict
-Aggregate Set: 00 00 00 00 00 01 01 00 00 00 <<< aggregated set of types that
- <<< cannot run because they
- <<< are in conflict set 1 and
- <<< (domain 0 is running w t0)
-
-
-Simple Type Enforcement policy:
-===============================
-Max Types = 5.
-Max Ssidrefs = 5.
-Ssidrefs Off = 8.
-
-SSID To STE-Type matrix:
-
- ssidref 0: 01 01 01 01 01 <<< ssidref0 points to a set
that
- <<< has all types set (colors)
- ssidref 1: 00 01 00 00 00 <<< ssidref1 has color1 set
- ssidref 2: 00 00 01 00 00 <<< ...
- ssidref 3: 00 00 00 01 00
- ssidref 4: 00 00 00 00 01
-
-
-Policy dump End.
-
-
-This is a small example policy with which we will demonstrate the enforcement.
-
-Starting Domains with policy enforcement
-========================================
-Now let us play with this policy.
-
-Define 3 or 4 domain configurations. I use the following config using a
ramdisk
-only and about 8MBytes of memory for each DomU (test purposes):
-
-#-------configuration xmsec1-------------------------
-kernel = "/boot/vmlinuz-2.6.11-xenU"
-ramdisk="/boot/U1_ramdisk.img"
-#security reference identifier
-ssidref= 0x00010001
-memory = 10
-name = "xmsec1"
-cpu = -1 # leave to Xen to pick
-# Number of network interfaces. Default is 1.
-nics=1
-dhcp="dhcp"
-#-----------------------------------------------------
-
-xmsec2 and xmsec3 look the same except for the name and the ssidref line. Use
-your domain config file and add "ssidref = 0x00010001" to the first (xmsec1),
-"ssidref= 0x00020002" to the second (call it xmsec2), and "ssidref=0x00030003"
-to the third (we will call this one xmsec3).
-
-First start xmsec1: xm create -c xmsec1 (succeeds)
-
-Then
-[root@laptop policy]# xm list
-Name Id Mem(MB) CPU State Time(s) Console
-Domain-0 0 620 0 r---- 42.3 s:00/p:00
-xmnosec 1 9 0 -b--- 0.3 9601 s:00/p:05
-xmsec1 2 9 0 -b--- 0.2 9602 s:01/p:01
-
-Shows a new domain xmsec1 running with primary (here: chinese wall) ssidref 1
-and secondary (here: simple type enforcement) ssidref 1. The ssidrefs are
-independent and can differ for a domain.
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic = 1debc.
-PolVer = aaaa0000.
-Len = 112.
-Primary = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types = a.
-Max Ssidrefs = 5.
-Max ConfSets = 2.
-Ssidrefs Off = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off = b0.
-
-SSID To CHWALL-Type matrix:
-
- ssidref 0: 01 00 00 00 00 00 00 00 00 00
- ssidref 1: 00 01 00 00 00 00 00 00 00 00
- ssidref 2: 00 00 01 00 00 00 00 00 00 00
- ssidref 3: 00 00 00 01 00 00 00 00 00 00
- ssidref 4: 00 00 00 00 01 00 00 00 00 00
-
-Confict Sets:
-
- c-set 0: 00 00 01 01 00 00 00 00 00 00
- c-set 1: 01 00 00 00 00 01 01 00 00 00 <<< t1 is not part of any c-set
-
-Running
-Types: 01 01 00 00 00 00 00 00 00 00 <<< xmsec1 has ssidref 1->type1
- ^^ <<< ref-count at position 1 incr
-Conflict
-Aggregate Set: 00 00 00 00 00 01 01 00 00 00 <<< domain 1 was allowed to
- <<< start since type 1 was not
- <<< in conflict with running
- <<< types
-
-Simple Type Enforcement policy:
-===============================
-Max Types = 5.
-Max Ssidrefs = 5.
-Ssidrefs Off = 8.
-
-SSID To STE-Type matrix:
-
- ssidref 0: 01 01 01 01 01 <<< the ste policy does not maintain; we
- ssidref 1: 00 01 00 00 00 <-- <<< see that domain xmsec1 has ste
- ssidref 2: 00 00 01 00 00 <<< ssidref1->type1 and has this type in
- ssidref 3: 00 00 00 01 00 <<< common with dom0
- ssidref 4: 00 00 00 00 01
-
-
-Policy dump End.
-
-Look at sHype output in xen dmesg:
-
-[root@laptop xen]# xm dmesg
-.
-.
-[somewhere near the very end]
-(XEN) chwall_init_domain_ssid: determined chwall_ssidref to 1.
-(XEN) ste_init_domain_ssid.
-(XEN) ste_init_domain_ssid: determined ste_ssidref to 1.
-(XEN) acm_init_domain_ssid: Instantiated individual ssid for domain 0x01.
-(XEN) chwall_post_domain_create.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-(XEN) shype_authorize_domops.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-
-
-You can see that the chinese wall policy does not complain and that the ste
-policy makes three access control decisions for three event-channels setup
-between domain 0 and the new domain 1. Each time, the two domains share the
-type1 and setting up the eventchannel is permitted.
-
-
-Starting up a second domain xmsec2:
-
-[root@laptop xen]# xm create -c xmsec2
-Using config file "xmsec2".
-Started domain xmsec2, console on port 9602
-************ REMOTE CONSOLE: CTRL-] TO QUIT ********
-Linux version 2.6.11-xenU (root@xxxxxxxxxxxxxxx) (gcc version 3.4.2 20041017
-(Red Hat 3.4.2-6.fc3)) #1 Wed Mar 30 13:14:31 EST 2005
-.
-.
-.
-[root@laptop policy]# xm list
-Name Id Mem(MB) CPU State Time(s) Console
-Domain-0 0 620 0 r---- 71.7 s:00/p:00
-xmsec1 1 9 0 -b--- 0.3 9601 s:01/p:01
-xmsec2 2 7 0 -b--- 0.3 9602 s:02/p:02 <<
our domain runs both policies with ssidref 2
-
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic = 1debc.
-PolVer = aaaa0000.
-Len = 112.
-Primary = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types = a.
-Max Ssidrefs = 5.
-Max ConfSets = 2.
-Ssidrefs Off = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off = b0.
-
-SSID To CHWALL-Type matrix:
-
- ssidref 0: 01 00 00 00 00 00 00 00 00 00
- ssidref 1: 00 01 00 00 00 00 00 00 00 00
- ssidref 2: 00 00 01 00 00 00 00 00 00 00 <<< our domain has type 2 set
- ssidref 3: 00 00 00 01 00 00 00 00 00 00
- ssidref 4: 00 00 00 00 01 00 00 00 00 00
-
-Confict Sets:
-
- c-set 0: 00 00 01 01 00 00 00 00 00 00 <<< t2 is in c-set0 with type 3
- c-set 1: 01 00 00 00 00 01 01 00 00 00
-
-Running
-Types: 01 01 01 00 00 00 00 00 00 00 <<< t2 is running since the
- ^^ <<< current aggregate conflict
- <<< set (see above) does not
- <<< include type 2
-Conflict
-Aggregate Set: 00 00 00 01 00 01 01 00 00 00 <<< type 3 is added to the
- <<< conflict aggregate
-
-
-Simple Type Enforcement policy:
-===============================
-Max Types = 5.
-Max Ssidrefs = 5.
-Ssidrefs Off = 8.
-
-SSID To STE-Type matrix:
-
- ssidref 0: 01 01 01 01 01
- ssidref 1: 00 01 00 00 00
- ssidref 2: 00 00 01 00 00
- ssidref 3: 00 00 00 01 00
- ssidref 4: 00 00 00 00 01
-
-
-Policy dump End.
-
-
-The sHype xen dmesg output looks similar to the one above when starting the
-first domain.
-
-Now we start xmsec3 and it has ssidref3. Thus, it tries to run as type3 which
-conflicts with running type2 (from xmsec2). As expected, creating this domain
-fails for security policy enforcement reasons.
-
-[root@laptop xen]# xm create -c xmsec3
-Using config file "xmsec3".
-Error: Error creating domain: (22, 'Invalid argument')
-[root@laptop xen]#
-
-[root@laptop xen]# xm dmesg
-.
-.
-[somewhere near the very end]
-(XEN) chwall_pre_domain_create.
-(XEN) chwall_pre_domain_create: CHINESE WALL CONFLICT in type 03.
-
-xmsec3 ssidref3 points to type3, which is in the current conflict aggregate
-set. This domain cannot start until domain xmsec2 is destroyed, at which time
-the aggregate conflict set is reduced and type3 is excluded from it. Then,
-xmsec3 can start. Of course, afterwards, xmsec2 cannot be restarted. Try it.
-
-3. Policy tool
-**************
-toos/policy/policy_tool.c
-
-a) ./policy_tool getpolicy
- prints the currently enforced policy
- (see for example section 1.)
-
-b) ./policy_tool setpolicy
- sets a predefined and hardcoded security
- policy (the one described in section 2.)
-
-c) ./policy_tool dumpstats
- prints some status information about the caching
- of access control decisions (number of cache hits
- and number of policy evaluations for grant_table
- and event channels).
-
-d) ./policy_tool loadpolicy <binary_policy_file>
- sets the policy defined in the <binary_policy_file>
- please use the policy_processor that is posted to this
- mailing list to create such a binary policy from an XML
- policy description
-
-4. Policy interface:
-********************
-The Policy interface is working in "network-byte-order" (big endian). The
reason for this
-is that policy files/management should be portable and independent of the
platforms.
-
-Our policy interface enables managers to create a single binary policy file in
a trusted
-environment and distributed it to multiple systems for enforcement.
-
-5. Booting with a binary policy:
-********************************
-The grub configuration file can be adapted to boot the hypervisor with an
-already active policy. To do this, a binary policy file - this can be
-the same file as used by the policy_tool - should be placed into the boot
-partition. The following entry from the grub configuration file shows how
-a binary policy can be added to the system during boot time. Note that the
-binary policy must be of the same type that the hypervisor was compiled
-for. The policy module line should also only be added as the last module
-line if XEN was compiled with the access control module (ACM).
-
-title XEN0 3.0 Devel
- kernel /xen.gz dom0_mem=400000
- module /vmlinuz-2.6.12-xen0 root=/dev/hda2 ro console=tty0
- module /initrd-2.6.12-xen0.img
- module /xen_sample_policy.bin
-
-
-====================end-of file=======================================
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/Makefile Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,17 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-XENARCH := $(subst ",,$(CONFIG_XENARCH))
-
-obj-y := timer_tsc.o
-c-obj-y :=
-
-c-link :=
-
-$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
- @ln -fsn $(srctree)/arch/i386/kernel/timers/$(notdir $@) $@
-
-obj-y += $(c-obj-y)
-
-clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c Wed Aug
24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,379 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __initdata = 0;
-
-extern spinlock_t i8253_lock;
-
-static int use_tsc;
-
-static unsigned long long monotonic_base;
-static u32 monotonic_offset;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by george@xxxxxxxxxx) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * -johnstul@xxxxxxxxxx "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-extern u32 shadow_tsc_stamp;
-extern u64 shadow_system_time;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= shadow_tsc_stamp;
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = monotonic_offset;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-
-static void mark_offset_tsc(void)
-{
-
- /* update the monotonic base value */
- write_seqlock(&monotonic_lock);
- monotonic_base = shadow_system_time;
- monotonic_offset = shadow_tsc_stamp;
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU
frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned long cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy =
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq,
freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient =
cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz/1000);
- }
- }
-#endif
- }
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-
-static int init_tsc(char* override)
-{
- u64 __cpu_khz;
-
- __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
- do_div(__cpu_khz, 1000);
- cpu_khz = (u32)__cpu_khz;
- printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
- {
- unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
- :"r" (cpu_khz),
- "0" (eax), "1" (edx));
- }
-
- set_cyc2ns_scale(cpu_khz/1000);
-
- use_tsc = 1;
-
- return 0;
-}
-
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: cannot disable TSC in Xen/Linux.\n");
- return 1;
-}
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
-};
-
-struct init_timer_opts timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,70 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <asm/pda.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
-#include <asm/ia32.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-int main(void)
-{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(thread);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info,
entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(kernelstack);
- ENTRY(oldrsp);
- ENTRY(pcurrent);
- ENTRY(irqrsp);
- ENTRY(irqcount);
- ENTRY(cpunumber);
- ENTRY(irqstackptr);
- ENTRY(kernel_mode);
- BLANK();
-#undef ENTRY
-#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct
sigcontext_ia32, entry))
- ENTRY(eax);
- ENTRY(ebx);
- ENTRY(ecx);
- ENTRY(edx);
- ENTRY(esi);
- ENTRY(edi);
- ENTRY(ebp);
- ENTRY(esp);
- ENTRY(eip);
- BLANK();
-#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe32, uc.uc_mcontext));
- BLANK();
-#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- return 0;
-}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/init_task.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/init_task.c Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,49 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,336 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm-xen/balloon.h>
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- int i;
-
- BUG_ON(direction == DMA_NONE);
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
- s->dma_length = s->length;
- }
- return nents;
-}
-
-EXPORT_SYMBOL(dma_map_sg);
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, int dir)
-{
- int i;
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
- BUG_ON(s->page == NULL);
- BUG_ON(s->dma_address == 0);
- dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
- }
-}
-
-EXPORT_SYMBOL(dma_unmap_sg);
-
-struct dma_coherent_mem {
- void *virt_base;
- u32 device_base;
- int size;
- int flags;
- unsigned long *bitmap;
-};
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, unsigned gfp)
-{
- void *ret;
- unsigned int order = get_order(size);
- unsigned long vstart;
-
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- /* ignore region specifiers */
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(ret, 0, size);
- return ret;
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- return NULL;
- }
-
- if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
-
- vstart = __get_free_pages(gfp, order);
- ret = (void *)vstart;
- if (ret == NULL)
- return ret;
-
- xen_contig_memory(vstart, order);
-
- memset(ret, 0, size);
- *dma_handle = virt_to_bus(ret);
-
- return ret;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base +
(mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- } else
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-#if 0
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = (pages + 31)/32;
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
- memset(dev->dma_mem->bitmap, 0, bitmap_size);
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem->bitmap);
- out:
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if(!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >>
PAGE_SHIFT;
- int pos, err;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-#endif
-
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
- struct list_head list;
- dma_addr_t dma;
- char *bounce, *host;
- size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
-dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- void *bnc;
- dma_addr_t dma;
- unsigned long flags;
-
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- /*
- * Even if size is sub-page, the buffer may still straddle a page
- * boundary. Take into account buffer start offset. All other calls are
- * conservative and always search the dma_map list if it's non-empty.
- */
- if (((((unsigned long)ptr) & ~PAGE_MASK) + size) <= PAGE_SIZE) {
- dma = virt_to_bus(ptr);
- } else {
- BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, 0)) == NULL);
- BUG_ON((ent = kmalloc(sizeof(*ent), GFP_KERNEL)) == NULL);
- if (direction != DMA_FROM_DEVICE)
- memcpy(bnc, ptr, size);
- ent->dma = dma;
- ent->bounce = bnc;
- ent->host = ptr;
- ent->size = size;
- spin_lock_irqsave(&dma_map_lock, flags);
- list_add(&ent->list, &dma_map_head);
- spin_unlock_irqrestore(&dma_map_lock, flags);
- }
-
- if ((dma+size) & ~*dev->dma_mask)
- out_of_line_bug();
- return dma;
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags;
-
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list ) {
- if (DMA_MAP_MATCHES(ent, dma_addr)) {
- list_del(&ent->list);
- break;
- }
- }
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- BUG_ON(dma_addr != ent->dma);
- BUG_ON(size != ent->size);
- if (direction != DMA_TO_DEVICE)
- memcpy(ent->host, ent->bounce, size);
- dma_free_coherent(dev, size, ent->bounce, ent->dma);
- kfree(ent);
- }
- }
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_TO_DEVICE)*/
- memcpy(ent->host+off, ent->bounce+off, size);
- }
- }
-}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
-
-void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_FROM_DEVICE)*/
- memcpy(ent->bounce+off, ent->host+off, size);
- }
- }
-
- flush_write_buffers();
-}
-EXPORT_SYMBOL(dma_sync_single_for_device);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,466 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-
-/*
- * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later
- */
-#ifndef CONFIG_XEN_PHYSDEV_ACCESS
-
-void * __ioremap(unsigned long phys_addr, unsigned long size,
- unsigned long flags)
-{
- return NULL;
-}
-
-void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- return NULL;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-}
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- return NULL;
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-}
-
-#else
-
-#if defined(__i386__)
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
- extern unsigned long max_low_pfn;
- unsigned long mfn = address >> PAGE_SHIFT;
- unsigned long pfn = mfn_to_pfn(mfn);
- return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
-}
-#elif defined(__x86_64__)
-/*
- *
- */
-static inline int is_local_lowmem(unsigned long address)
-{
- return 0;
-}
-#endif
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned
long flags)
-{
- void __iomem * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- domid_t domid = DOMID_IO;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-#endif
-
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (is_local_lowmem(phys_addr)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = bus_to_virt(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end);
page++)
- if(!PageReserved(page))
- return NULL;
-
- domid = DOMID_LOCAL;
- }
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
- size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_DIRTY | _PAGE_ACCESSED
-#if defined(__x86_64__)
- | _PAGE_USER
-#endif
- | flags), domid)) {
- vunmap((void __force *) addr);
- return NULL;
- }
- return (void __iomem *) (offset + (char __iomem *)addr);
-}
-
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr;
- void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
- if (!p)
- return p;
-
- /* Guaranteed to be > phys_addr, as per __ioremap() */
- last_addr = phys_addr + size - 1;
-
- if (is_local_lowmem(last_addr)) {
- struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
- unsigned long npages;
-
- phys_addr &= PAGE_MASK;
-
- /* This might overflow and become zero.. */
- last_addr = PAGE_ALIGN(last_addr);
-
- /* .. but that's ok, because modulo-2**n arithmetic will make
- * the page-aligned "last - first" come out right.
- */
- npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
- iounmap(p);
- p = NULL;
- }
- global_flush_tlb();
- }
-
- return p;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p;
- if ((void __force *) addr <= high_memory)
- return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
-#endif
- p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
- if (!p) {
- printk("__iounmap: bad address %p\n", addr);
- return;
- }
-
- if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
- /* p->size includes the guard page, but cpa doesn't like that */
- change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
- (p->size - PAGE_SIZE) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
- }
- kfree(p);
-}
-
-#if defined(__i386__)
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long offset, last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-#endif
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS)
- return NULL;
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- set_fixmap(idx, phys_addr);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
- return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
-#endif
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- clear_fixmap(idx);
- --idx;
- --nrpages;
- }
-}
-#endif /* defined(__i386__) */
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
-
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
- __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
- __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
-static inline void direct_remap_area_pte(pte_t *pte,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
-
- do {
- (*v)->ptr = virt_to_machine(pte);
- (*v)++;
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
- pmd_t *pmd,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- if (address >= end)
- BUG();
- do {
- pte_t *pte = (mm == &init_mm) ?
- pte_alloc_kernel(mm, pmd, address) :
- pte_alloc_map(mm, pmd, address);
- if (!pte)
- return -ENOMEM;
- direct_remap_area_pte(pte, address, end - address, v);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return 0;
-}
-
-int __direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long size,
- mmu_update_t *v)
-{
- pgd_t * dir;
- unsigned long end = address + size;
- int error;
-
-#if defined(__i386__)
- dir = pgd_offset(mm, address);
-#elif defined (__x86_64)
- dir = (mm == &init_mm) ?
- pgd_offset_k(address):
- pgd_offset(mm, address);
-#endif
- if (address >= end)
- BUG();
- spin_lock(&mm->page_table_lock);
- do {
- pud_t *pud;
- pmd_t *pmd;
-
- error = -ENOMEM;
- pud = pud_alloc(mm, dir, address);
- if (!pud)
- break;
- pmd = pmd_alloc(mm, pud, address);
- if (!pmd)
- break;
- error = 0;
- direct_remap_area_pmd(mm, pmd, address, end - address, &v);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
-
- } while (address && (address < end));
- spin_unlock(&mm->page_table_lock);
- return error;
-}
-
-
-int direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long machine_addr,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
-{
- int i;
- unsigned long start_address;
-#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
-
- start_address = address;
-
- flush_cache_all();
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
- /* Fill in the PTE pointers. */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
-
- if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
- return -EFAULT;
- v = u;
- start_address = address;
- }
-
- /*
- * Fill in the machine address: PTE ptr is done later by
- * __direct_remap_area_pages().
- */
- v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
-
- machine_addr += PAGE_SIZE;
- address += PAGE_SIZE;
- v++;
- }
-
- if (v != u) {
- /* get the ptep's filled in */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
- if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
- return -EFAULT;
- }
-
- flush_tlb_all();
-
- return 0;
-}
-
-EXPORT_SYMBOL(direct_remap_area_pages);
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/blkback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/control.c Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,61 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/control.c
- *
- * Routines for interfacing with the control plane.
- *
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
-
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_BE_CREATE:
- blkif_create((blkif_be_create_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_DESTROY:
- blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_CONNECT:
- blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_DISCONNECT:
- if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
- return; /* Sending the response is deferred until later. */
- break;
- case CMSG_BLKIF_BE_VBD_CREATE:
- vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_VBD_DESTROY:
- vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
- break;
- default:
- DPRINTK("Parse error while reading message subtype %d, len %d\n",
- msg->subtype, msg->length);
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-void blkif_ctrlif_init(void)
-{
- ctrl_msg_t cmsg;
- blkif_be_driver_status_t st;
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_BLKIF_BE;
- cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS;
- cmsg.length = sizeof(blkif_be_driver_status_t);
- st.status = BLKIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/drivers/xen/netback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/control.c Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,58 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/netif/backend/control.c
- *
- * Routines for interfacing with the control plane.
- *
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype);
-
- switch ( msg->subtype )
- {
- case CMSG_NETIF_BE_CREATE:
- netif_create((netif_be_create_t *)&msg->msg[0]);
- break;
- case CMSG_NETIF_BE_DESTROY:
- netif_destroy((netif_be_destroy_t *)&msg->msg[0]);
- break;
- case CMSG_NETIF_BE_CREDITLIMIT:
- netif_creditlimit((netif_be_creditlimit_t *)&msg->msg[0]);
- break;
- case CMSG_NETIF_BE_CONNECT:
- netif_connect((netif_be_connect_t *)&msg->msg[0]);
- break;
- case CMSG_NETIF_BE_DISCONNECT:
- if ( !netif_disconnect((netif_be_disconnect_t *)&msg->msg[0],msg->id) )
- return; /* Sending the response is deferred until later. */
- break;
- default:
- DPRINTK("Parse error while reading message subtype %d, len %d\n",
- msg->subtype, msg->length);
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-void netif_ctrlif_init(void)
-{
- ctrl_msg_t cmsg;
- netif_be_driver_status_t st;
-
- (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_NETIF_BE;
- cmsg.subtype = CMSG_NETIF_BE_DRIVER_STATUS;
- cmsg.length = sizeof(netif_be_driver_status_t);
- st.status = NETIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 5f1ed597f107 -r 8799d14bef77
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pda.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pda.h Wed Aug 24
02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,85 +0,0 @@
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-struct x8664_pda {
- struct task_struct *pcurrent; /* Current process */
- unsigned long data_offset; /* Per cpu data offset from linker
address */
- struct x8664_pda *me; /* Pointer to itself */
- unsigned long kernelstack; /* top of kernel stack for current */
- unsigned long oldrsp; /* user rsp for system call */
- unsigned long irqrsp; /* Old rsp for interrupts. */
- int irqcount; /* Irq nesting counter. Starts with -1 */
- int cpunumber; /* Logical CPU number */
- char *irqstackptr; /* top of irqstack */
- unsigned int __softirq_pending;
- unsigned int __nmi_count; /* number of NMI on this CPUs */
- unsigned long idle_timestamp;
- struct mm_struct *active_mm;
- int mmu_state;
- unsigned apic_timer_irqs;
- int kernel_mode; /* kernel or user mode */
-} ____cacheline_aligned;
-
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
-
-extern struct x8664_pda cpu_pda[];
-
-/*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
-#define sizeof_field(type,field) (sizeof(((type *)0)->field))
-#define typeof_field(type,field) typeof(((type *)0)->field)
-
-extern void __bad_pda_field(void);
-
-#define pda_offset(field) offsetof(struct x8664_pda, field)
-
-#define pda_to_op(op,field,val) do { \
- switch (sizeof_field(struct x8664_pda, field)) { \
-case 2: \
-asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory");
break; \
-case 4: \
-asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory");
break; \
-case 8: \
-asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory");
break; \
- default: __bad_pda_field(); \
- } \
- } while (0)
-
-/*
- * AK: PDA read accesses should be neither volatile nor have an memory clobber.
- * Unfortunately removing them causes all hell to break lose currently.
- */
-#define pda_from_op(op,field) ({ \
- typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
- switch (sizeof_field(struct x8664_pda, field)) { \
-case 2: \
-asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory");
break;\
-case 4: \
-asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory");
break;\
-case 8: \
-asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory");
break;\
- default: __bad_pda_field(); \
- } \
- ret__; })
-
-
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-
-#endif
-
-#define PDA_STACKOFFSET (5*8)
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 patches/linux-2.6.12/x86_64-linux.patch
--- a/patches/linux-2.6.12/x86_64-linux.patch Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,68 +0,0 @@
-diff -urN linux-2.6.10-orig/include/asm-x86_64/hw_irq.h
linux-2.6.10/include/asm-x86_64/hw_irq.h
---- linux-2.6.10-orig/include/asm-x86_64/hw_irq.h 2005-01-06
00:34:38.000000000 -0500
-+++ linux-2.6.10/include/asm-x86_64/hw_irq.h 2005-02-25 17:45:37.181518088
-0500
-@@ -48,6 +48,7 @@
- *
- * Vectors 0xf0-0xf9 are free (reserved for future Linux use).
- */
-+#ifndef CONFIG_XEN
- #define SPURIOUS_APIC_VECTOR 0xff
- #define ERROR_APIC_VECTOR 0xfe
- #define INVALIDATE_TLB_VECTOR 0xfd
-@@ -57,7 +58,7 @@
- #define KDB_VECTOR 0xf9
-
- #define THERMAL_APIC_VECTOR 0xf0
--
-+#endif
-
- /*
- * Local APIC timer IRQ vector is on a different priority level,
-diff -urN linux-2.6.10-orig/include/asm-x86_64/irq.h
linux-2.6.10/include/asm-x86_64/irq.h
---- linux-2.6.10-orig/include/asm-x86_64/irq.h 2005-01-06 00:34:38.000000000
-0500
-+++ linux-2.6.10/include/asm-x86_64/irq.h 2005-02-25 17:45:37.181518088
-0500
-@@ -10,6 +10,9 @@
- * <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
- */
-
-+#ifdef CONFIG_XEN
-+#include "irq_vectors.h"
-+#endif
- #define TIMER_IRQ 0
-
- /*
-@@ -22,6 +25,7 @@
- * the usable vector space is 0x20-0xff (224 vectors)
- */
-
-+#ifndef CONFIG_XEN
- /*
- * The maximum number of vectors supported by x86_64 processors
- * is limited to 256. For processors other than x86_64, NR_VECTORS
-@@ -38,6 +42,7 @@
- #define NR_IRQS 224
- #define NR_IRQ_VECTORS 1024
- #endif
-+#endif
-
- static __inline__ int irq_canonicalize(int irq)
- {
-diff -urN linux-2.6.10-orig/include/asm-x86_64/posix_types.h
linux-2.6.10/include/asm-x86_64/posix_types.h
---- linux-2.6.10-orig/include/asm-x86_64/posix_types.h 2004-10-18
17:55:29.000000000 -0400
-+++ linux-2.6.10/include/asm-x86_64/posix_types.h 2005-02-25
17:45:37.183517784 -0500
-@@ -6,7 +6,7 @@
- * be a little careful about namespace pollution etc. Also, we cannot
- * assume GCC is being used.
- */
--
-+#ifndef __ASSEMBLY__
- typedef unsigned long __kernel_ino_t;
- typedef unsigned int __kernel_mode_t;
- typedef unsigned long __kernel_nlink_t;
-@@ -115,5 +115,5 @@
- }
-
- #endif /* defined(__KERNEL__) */
--
-+#endif
- #endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/Makefile
--- a/tools/consoled/Makefile Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,48 +0,0 @@
-# Makefile for consoled
-# based on xcs Makefile
-# Anthony Liguori 2005
-
-XEN_ROOT=../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-CONSOLED_INSTALL_DIR = /usr/sbin
-XC_CONSOLE_INSTALL_DIR = /usr/libexec/xen
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-CC = gcc
-CFLAGS = -Wall -Werror -g3
-
-CFLAGS += -I $(XEN_XCS)
-CFLAGS += -I $(XEN_LIBXC)
-CFLAGS += -I $(XEN_XENSTORE)
-
-SRCS :=
-SRCS += main.c utils.c io.c
-
-HDRS = $(wildcard *.h)
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-BIN = consoled xc_console
-
-all: $(BIN)
-
-clean:
- $(RM) *.a *.so *.o *.rpm $(BIN)
-
-consoled: $(OBJS)
- $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
- -lxc -lxenstore
-
-xc_console: xc_console.o
- $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
- -lxc -lxenstore
-
-$(OBJS): $(HDRS)
-
-install: $(BIN)
- $(INSTALL_DIR) -p $(DESTDIR)/$(CONSOLED_INSTALL_DIR)
- $(INSTALL_PROG) consoled $(DESTDIR)/$(CONSOLED_INSTALL_DIR)
- $(INSTALL_DIR) -p $(DESTDIR)/$(XC_CONSOLE_INSTALL_DIR)
- $(INSTALL_PROG) xc_console $(DESTDIR)/$(XC_CONSOLE_INSTALL_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/io.c
--- a/tools/consoled/io.c Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,328 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#define _GNU_SOURCE
-
-#include "utils.h"
-#include "io.h"
-
-#include "xc.h"
-#include "xs.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include <malloc.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/select.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <termios.h>
-
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-struct buffer
-{
- char *data;
- size_t size;
- size_t capacity;
- size_t max_capacity;
-};
-
-void buffer_append(struct buffer *buffer, const void *data, size_t size)
-{
- if ((buffer->capacity - buffer->size) < size) {
- buffer->capacity += (size + 1024);
- buffer->data = realloc(buffer->data, buffer->capacity);
- if (buffer->data == NULL) {
- dolog(LOG_ERR, "Memory allocation failed");
- exit(ENOMEM);
- }
- }
-
- memcpy(buffer->data + buffer->size, data, size);
- buffer->size += size;
-
- if (buffer->max_capacity &&
- buffer->size > buffer->max_capacity) {
- memmove(buffer->data + (buffer->size - buffer->max_capacity),
- buffer->data, buffer->max_capacity);
- buffer->data = realloc(buffer->data, buffer->max_capacity);
- buffer->capacity = buffer->max_capacity;
- }
-}
-
-bool buffer_empty(struct buffer *buffer)
-{
- return buffer->size == 0;
-}
-
-void buffer_advance(struct buffer *buffer, size_t size)
-{
- size = MIN(size, buffer->size);
- memmove(buffer->data, buffer + size, buffer->size - size);
- buffer->size -= size;
-}
-
-struct domain
-{
- int domid;
- int tty_fd;
- struct buffer buffer;
- struct domain *next;
-};
-
-static struct domain *dom_head;
-
-bool domain_is_valid(int domid)
-{
- bool ret;
- xc_dominfo_t info;
-
- ret = (xc_domain_getinfo(xc, domid, 1, &info) == 1 &&
- info.domid == domid);
-
- return ret;
-}
-
-int domain_create_tty(int domid)
-{
- char path[1024];
- int master;
-
- if ((master = getpt()) == -1 ||
- grantpt(master) == -1 || unlockpt(master) == -1) {
- dolog(LOG_ERR, "Failed to create tty for domain-%d", domid);
- master = -1;
- } else {
- const char *slave = ptsname(master);
- struct termios term;
-
- if (tcgetattr(master, &term) != -1) {
- cfmakeraw(&term);
- tcsetattr(master, TCSAFLUSH, &term);
- }
-
- xs_mkdir(xs, "/console");
- snprintf(path, sizeof(path), "/console/%d", domid);
- xs_mkdir(xs, path);
- strcat(path, "/tty");
-
- xs_write(xs, path, slave, strlen(slave), O_CREAT);
- }
-
- return master;
-}
-
-struct domain *create_domain(int domid)
-{
- struct domain *dom;
- char *data;
- unsigned int len;
- char path[1024];
-
- dom = (struct domain *)malloc(sizeof(struct domain));
- if (dom == NULL) {
- dolog(LOG_ERR, "Out of memory %s:%s():L%d",
- __FILE__, __FUNCTION__, __LINE__);
- exit(ENOMEM);
- }
-
- dom->domid = domid;
- dom->tty_fd = domain_create_tty(domid);
- dom->buffer.data = 0;
- dom->buffer.size = 0;
- dom->buffer.capacity = 0;
- dom->buffer.max_capacity = 0;
-
- snprintf(path, sizeof(path), "/console/%d/limit", domid);
- data = xs_read(xs, path, &len);
- if (data) {
- dom->buffer.max_capacity = strtoul(data, 0, 0);
- free(data);
- }
-
- dolog(LOG_DEBUG, "New domain %d", domid);
-
- return dom;
-}
-
-struct domain *lookup_domain(int domid)
-{
- struct domain **pp;
-
- for (pp = &dom_head; *pp; pp = &(*pp)->next) {
- struct domain *dom = *pp;
-
- if (dom->domid == domid) {
- return dom;
- } else if (dom->domid > domid) {
- *pp = create_domain(domid);
- (*pp)->next = dom;
- return *pp;
- }
- }
-
- *pp = create_domain(domid);
- return *pp;
-}
-
-void remove_domain(struct domain *dom)
-{
- struct domain **pp;
-
- dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
-
- for (pp = &dom_head; *pp; pp = &(*pp)->next) {
- struct domain *d = *pp;
-
- if (dom->domid == d->domid) {
- *pp = d->next;
- free(d);
- break;
- }
- }
-}
-
-void handle_tty_read(struct domain *dom)
-{
- ssize_t len;
- xcs_msg_t msg;
-
- msg.type = XCS_REQUEST;
- msg.u.control.remote_dom = dom->domid;
- msg.u.control.msg.type = CMSG_CONSOLE;
- msg.u.control.msg.subtype = CMSG_CONSOLE_DATA;
- msg.u.control.msg.id = 1;
-
- len = read(dom->tty_fd, msg.u.control.msg.msg, 60);
- if (len < 1) {
- close(dom->tty_fd);
-
- if (domain_is_valid(dom->domid)) {
- dom->tty_fd = domain_create_tty(dom->domid);
- } else {
- remove_domain(dom);
- }
- } else if (domain_is_valid(dom->domid)) {
- msg.u.control.msg.length = len;
-
- if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) {
- dolog(LOG_ERR, "Write to xcs failed: %m");
- }
- } else {
- close(dom->tty_fd);
- remove_domain(dom);
- }
-}
-
-void handle_tty_write(struct domain *dom)
-{
- ssize_t len;
-
- len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size);
- if (len < 1) {
- close(dom->tty_fd);
-
- if (domain_is_valid(dom->domid)) {
- dom->tty_fd = domain_create_tty(dom->domid);
- } else {
- remove_domain(dom);
- }
- } else {
- buffer_advance(&dom->buffer, len);
- }
-}
-
-void handle_xcs_msg(int fd)
-{
- xcs_msg_t msg;
-
- if (!read_sync(fd, &msg, sizeof(msg))) {
- dolog(LOG_ERR, "read from xcs failed! %m");
- } else if (msg.type == XCS_REQUEST) {
- struct domain *dom;
-
- dom = lookup_domain(msg.u.control.remote_dom);
- buffer_append(&dom->buffer,
- msg.u.control.msg.msg,
- msg.u.control.msg.length);
- }
-}
-
-static void enum_domains(void)
-{
- int domid = 0;
- xc_dominfo_t dominfo;
-
- while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
- lookup_domain(dominfo.domid);
- domid = dominfo.domid + 1;
- }
-}
-
-void handle_io(void)
-{
- fd_set readfds, writefds;
- int ret;
- int max_fd = -1;
-
- do {
- struct domain *d;
- struct timeval tv = { 1, 0 };
-
- FD_ZERO(&readfds);
- FD_ZERO(&writefds);
-
- FD_SET(xcs_data_fd, &readfds);
- max_fd = MAX(xcs_data_fd, max_fd);
-
- for (d = dom_head; d; d = d->next) {
- if (d->tty_fd != -1) {
- FD_SET(d->tty_fd, &readfds);
- }
-
- if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) {
- FD_SET(d->tty_fd, &writefds);
- }
-
- max_fd = MAX(d->tty_fd, max_fd);
- }
-
- ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
- enum_domains();
-
- if (FD_ISSET(xcs_data_fd, &readfds)) {
- handle_xcs_msg(xcs_data_fd);
- }
-
- for (d = dom_head; d; d = d->next) {
- if (FD_ISSET(d->tty_fd, &readfds)) {
- handle_tty_read(d);
- }
-
- if (FD_ISSET(d->tty_fd, &writefds)) {
- handle_tty_write(d);
- }
- }
- } while (ret > -1);
-}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/io.h
--- a/tools/consoled/io.h Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,26 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#ifndef CONSOLED_IO_H
-#define CONSOLED_IO_H
-
-void handle_io(void);
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/main.c
--- a/tools/consoled/main.c Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,93 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#include <getopt.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/types.h>
-
-#include "xc.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include "utils.h"
-#include "io.h"
-
-int main(int argc, char **argv)
-{
- const char *sopts = "hVvi";
- struct option lopts[] = {
- { "help", 0, 0, 'h' },
- { "version", 0, 0, 'V' },
- { "verbose", 0, 0, 'v' },
- { "interactive", 0, 0, 'i' },
- { 0 },
- };
- bool is_interactive = false;
- int ch;
- int syslog_option = LOG_CONS;
- int syslog_mask = LOG_WARNING;
- int opt_ind = 0;
-
- while ((ch = getopt_long(argc, argv, sopts, lopts, &opt_ind)) != -1) {
- switch (ch) {
- case 'h':
- //usage(argv[0]);
- exit(0);
- case 'V':
- //version(argv[0]);
- exit(0);
- case 'v':
- syslog_option |= LOG_PERROR;
- syslog_mask = LOG_DEBUG;
- break;
- case 'i':
- is_interactive = true;
- break;
- case '?':
- fprintf(stderr,
- "Try `%s --help' for more information\n",
- argv[0]);
- exit(EINVAL);
- }
- }
-
- if (geteuid() != 0) {
- fprintf(stderr, "%s requires root to run.\n", argv[0]);
- exit(EPERM);
- }
-
- openlog("consoled", syslog_option, LOG_DAEMON);
- setlogmask(syslog_mask);
-
- if (!is_interactive) {
- daemonize("/var/run/consoled.pid");
- }
-
- xen_setup();
-
- handle_io();
-
- closelog();
-
- return 0;
-}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/utils.c
--- a/tools/consoled/utils.c Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,251 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <err.h>
-#include <errno.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <stdbool.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <string.h>
-
-#include "xc.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include "utils.h"
-
-struct xs_handle *xs;
-int xc;
-
-int xcs_ctrl_fd = -1;
-int xcs_data_fd = -1;
-
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
-{
- size_t offset = 0;
- ssize_t len;
-
- while (offset < size) {
- if (do_read) {
- len = read(fd, data + offset, size - offset);
- } else {
- len = write(fd, data + offset, size - offset);
- }
-
- if (len < 1) {
- if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
- return false;
- }
- } else {
- offset += len;
- }
- }
-
- return true;
-}
-
-static int open_domain_socket(const char *path)
-{
- struct sockaddr_un addr;
- int sock;
- size_t addr_len;
-
- if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
- goto out;
- }
-
- addr.sun_family = AF_UNIX;
- strcpy(addr.sun_path, path);
- addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
-
- if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
- goto out_close_sock;
- }
-
- return sock;
-
- out_close_sock:
- close(sock);
- out:
- return -1;
-}
-
-static void child_exit(int sig)
-{
- while (waitpid(-1, NULL, WNOHANG) > 0);
-}
-
-void daemonize(const char *pidfile)
-{
- pid_t pid;
- int fd;
- int len;
- int i;
- char buf[100];
-
- if (getppid() == 1) {
- return;
- }
-
- if ((pid = fork()) > 0) {
- exit(0);
- } else if (pid == -1) {
- err(errno, "fork() failed");
- }
-
- setsid();
-
- /* redirect fd 0,1,2 to /dev/null */
- if ((fd = open("/dev/null",O_RDWR)) == -1) {
- exit(1);
- }
-
- for (i = 0; i <= 2; i++) {
- close(i);
- dup2(fd, i);
- }
-
- close(fd);
-
- umask(027);
- chdir("/");
-
- fd = open(pidfile, O_RDWR | O_CREAT);
- if (fd == -1) {
- exit(1);
- }
-
- if (lockf(fd, F_TLOCK, 0) == -1) {
- exit(1);
- }
-
- len = sprintf(buf, "%d\n", getpid());
- write(fd, buf, len);
-
- signal(SIGCHLD, child_exit);
- signal(SIGTSTP, SIG_IGN);
- signal(SIGTTOU, SIG_IGN);
- signal(SIGTTIN, SIG_IGN);
-}
-
-/* synchronized send/recv strictly for setting up xcs */
-/* always use asychronize callbacks any other time */
-static bool xcs_send_recv(int fd, xcs_msg_t *msg)
-{
- bool ret = false;
-
- if (!write_sync(fd, msg, sizeof(*msg))) {
- dolog(LOG_ERR, "Write failed at %s:%s():L%d? Possible bug.",
- __FILE__, __FUNCTION__, __LINE__);
- goto out;
- }
-
- if (!read_sync(fd, msg, sizeof(*msg))) {
- dolog(LOG_ERR, "Read failed at %s:%s():L%d? Possible bug.",
- __FILE__, __FUNCTION__, __LINE__);
- goto out;
- }
-
- ret = true;
-
- out:
- return ret;
-}
-
-bool xen_setup(void)
-{
- int sock;
- xcs_msg_t msg;
-
- xs = xs_daemon_open();
- if (xs == NULL) {
- dolog(LOG_ERR,
- "Failed to contact xenstore (%m). Is it running?");
- goto out;
- }
-
- xc = xc_interface_open();
- if (xc == -1) {
- dolog(LOG_ERR, "Failed to contact hypervisor (%m)");
- goto out;
- }
-
- sock = open_domain_socket(XCS_SUN_PATH);
- if (sock == -1) {
- dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?");
- goto out_close_store;
- }
-
- xcs_ctrl_fd = sock;
-
- sock = open_domain_socket(XCS_SUN_PATH);
- if (sock == -1) {
- dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?");
- goto out_close_ctrl;
- }
-
- xcs_data_fd = sock;
-
- memset(&msg, 0, sizeof(msg));
- msg.type = XCS_CONNECT_CTRL;
- if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
- dolog(LOG_ERR, "xcs control connect failed. Possible bug.");
- goto out_close_data;
- }
-
- msg.type = XCS_CONNECT_DATA;
- if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) {
- dolog(LOG_ERR, "xcs data connect failed. Possible bug.");
- goto out_close_data;
- }
-
- /* Since the vast majority of control messages are console messages
- it's just easier to ignore other messages that try to bind to
- a specific type. */
- msg.type = XCS_MSG_BIND;
- msg.u.bind.port = PORT_WILDCARD;
- msg.u.bind.type = TYPE_WILDCARD;
- if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
- dolog(LOG_ERR, "xcs vind failed. Possible bug.");
- goto out_close_data;
- }
-
- return true;
-
- out_close_data:
- close(xcs_ctrl_fd);
- xcs_data_fd = -1;
- out_close_ctrl:
- close(xcs_ctrl_fd);
- xcs_ctrl_fd = -1;
- out_close_store:
- xs_daemon_close(xs);
- out:
- return false;
-}
-
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/utils.h
--- a/tools/consoled/utils.h Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,47 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#ifndef CONSOLED_UTILS_H
-#define CONSOLED_UTILS_H
-
-#include <stdbool.h>
-#include <syslog.h>
-#include <stdio.h>
-
-#include "xs.h"
-
-void daemonize(const char *pidfile);
-bool xen_setup(void);
-#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
-#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
-
-extern int xcs_ctrl_fd;
-extern int xcs_data_fd;
-extern struct xs_handle *xs;
-extern int xc;
-
-#if 1
-#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__)
-#else
-#define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__)
-#endif
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/xc_console.c
--- a/tools/consoled/xc_console.c Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,236 +0,0 @@
-/*\
- * Copyright (C) International Business Machines Corp., 2005
- * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- * Xen Console Daemon
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-\*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <time.h>
-#include <fcntl.h>
-#include <sys/wait.h>
-#include <termios.h>
-#include <signal.h>
-#include <getopt.h>
-#include <sys/select.h>
-#include <err.h>
-#include <errno.h>
-#include <pty.h>
-
-#include "xc.h"
-#include "xs.h"
-
-#define ESCAPE_CHARACTER 0x1d
-
-static volatile sig_atomic_t received_signal = 0;
-
-static void sighandler(int signum)
-{
- received_signal = 1;
-}
-
-static bool write_sync(int fd, const void *data, size_t size)
-{
- size_t offset = 0;
- ssize_t len;
-
- while (offset < size) {
- len = write(fd, data + offset, size - offset);
- if (len < 1) {
- return false;
- }
- offset += len;
- }
-
- return true;
-}
-
-static void usage(const char *program) {
- printf("Usage: %s [OPTION] DOMID\n"
- "Attaches to a virtual domain console\n"
- "\n"
- " -h, --help display this help and exit\n"
- , program);
-}
-
-/* don't worry too much if setting terminal attributes fail */
-static void init_term(int fd, struct termios *old)
-{
- struct termios new_term;
-
- if (tcgetattr(fd, old) == -1) {
- perror("tcgetattr() failed");
- return;
- }
-
- new_term = *old;
- cfmakeraw(&new_term);
-
- if (tcsetattr(fd, TCSAFLUSH, &new_term) == -1) {
- perror("tcsetattr() failed");
- }
-}
-
-static void restore_term(int fd, struct termios *old)
-{
- if (tcsetattr(fd, TCSAFLUSH, old) == -1) {
- perror("tcsetattr() failed");
- }
-}
-
-static int console_loop(int xc_handle, domid_t domid, int fd)
-{
- int ret;
-
- do {
- fd_set fds;
-
- FD_ZERO(&fds);
- FD_SET(STDIN_FILENO, &fds);
- FD_SET(fd, &fds);
-
- ret = select(fd + 1, &fds, NULL, NULL, NULL);
- if (ret == -1) {
- if (errno == EINTR || errno == EAGAIN) {
- continue;
- }
- perror("select() failed");
- return -1;
- }
-
- if (FD_ISSET(STDIN_FILENO, &fds)) {
- ssize_t len;
- char msg[60];
-
- len = read(STDIN_FILENO, msg, sizeof(msg));
- if (len == 1 && msg[0] == ESCAPE_CHARACTER) {
- return 0;
- }
-
- if (len == 0 && len == -1) {
- if (len == -1 &&
- (errno == EINTR || errno == EAGAIN)) {
- continue;
- }
- perror("select() failed");
- return -1;
- }
-
- if (!write_sync(fd, msg, len)) {
- perror("write() failed");
- return -1;
- }
- }
-
- if (FD_ISSET(fd, &fds)) {
- ssize_t len;
- char msg[512];
-
- len = read(fd, msg, sizeof(msg));
- if (len == 0 || len == -1) {
- if (len == -1 &&
- (errno == EINTR || errno == EAGAIN)) {
- continue;
- }
- perror("select() failed");
- return -1;
- }
-
- if (!write_sync(STDOUT_FILENO, msg, len)) {
- perror("write() failed");
- return -1;
- }
- }
- } while (received_signal == 0);
-
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- struct termios attr;
- int domid;
- int xc_handle;
- char *sopt = "hf:pc";
- int ch;
- int opt_ind=0;
- struct option lopt[] = {
- { "help", 0, 0, 'h' },
- { "file", 1, 0, 'f' },
- { "pty", 0, 0, 'p' },
- { "ctty", 0, 0, 'c' },
- { 0 },
-
- };
- char *str_pty;
- char path[1024];
- int spty;
- unsigned int len = 0;
- struct xs_handle *xs;
-
- while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
- switch(ch) {
- case 'h':
- usage(argv[0]);
- exit(0);
- break;
- }
- }
-
- if ((argc - optind) != 1) {
- fprintf(stderr, "Invalid number of arguments\n");
- fprintf(stderr, "Try `%s --help' for more information.\n",
- argv[0]);
- exit(EINVAL);
- }
-
- domid = atoi(argv[optind]);
-
- xs = xs_daemon_open();
- if (xs == NULL) {
- err(errno, "Could not contact XenStore");
- }
-
- xc_handle = xc_interface_open();
- if (xc_handle == -1) {
- err(errno, "xc_interface_open()");
- }
-
- signal(SIGTERM, sighandler);
-
- snprintf(path, sizeof(path), "/console/%d/tty", domid);
- str_pty = xs_read(xs, path, &len);
- if (str_pty == NULL) {
- err(errno, "Could not read tty from store");
- }
- spty = open(str_pty, O_RDWR | O_NOCTTY);
- if (spty == -1) {
- err(errno, "Could not open tty `%s'", str_pty);
- }
- free(str_pty);
-
- init_term(STDIN_FILENO, &attr);
- console_loop(xc_handle, domid, spty);
- restore_term(STDIN_FILENO, &attr);
-
- return 0;
- }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/network
--- a/tools/examples/network Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,246 +0,0 @@
-#!/bin/sh
-#============================================================================
-# Default Xen network start/stop script.
-# Xend calls a network script when it starts.
-# The script name to use is defined in /etc/xen/xend-config.sxp
-# in the network-script field.
-#
-# This script creates a bridge (default xen-br0), adds a device
-# (default eth0) to it, copies the IP addresses from the device
-# to the bridge and adjusts the routes accordingly.
-#
-# If all goes well, this should ensure that networking stays up.
-# However, some configurations are upset by this, especially
-# NFS roots. If the bridged setup does not meet your needs,
-# configure a different script, for example using routing instead.
-#
-# Usage:
-#
-# network (start|stop|status) {VAR=VAL}*
-#
-# Vars:
-#
-# bridge The bridge to use (default xen-br0).
-# netdev The interface to add to the bridge (default eth0).
-# antispoof Whether to use iptables to prevent spoofing (default yes).
-#
-# start:
-# Creates the bridge and enslaves netdev to it.
-# Copies the IP addresses from netdev to the bridge.
-# Deletes the routes to netdev and adds them on bridge.
-#
-# stop:
-# Removes netdev from the bridge.
-# Deletes the routes to bridge and adds them to netdev.
-#
-# status:
-# Print ifconfig for netdev and bridge.
-# Print routes.
-#
-#============================================================================
-
-# Exit if anything goes wrong.
-set -e
-
-# First arg is the operation.
-OP=$1
-shift
-
-# Pull variables in args in to environment.
-for arg ; do export "${arg}" ; done
-
-bridge=${bridge:-xen-br0}
-netdev=${netdev:-eth0}
-antispoof=${antispoof:-yes}
-
-echo "*network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" >&2
-
-# Usage: transfer_addrs src dst
-# Copy all IP addresses (including aliases) from device $src to device $dst.
-transfer_addrs () {
- local src=$1
- local dst=$2
- # Don't bother if $dst already has IP addresses.
- if ip addr show dev ${dst} | egrep -q '^ *inet ' ; then
- return
- fi
- # Address lines start with 'inet' and have the device in them.
- # Replace 'inet' with 'ip addr add' and change the device name $src
- # to 'dev $src'.
- ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
-s/inet/ip addr add/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
-s/${src}/dev ${dst}/
-" | sh -e
- # Remove automatic routes on destionation device
- ip route list | sed -ne "
-/dev ${dst}\( \|$\)/ {
- s/^/ip route del /
- p
-}" | sh -e
-}
-
-# Usage: del_addrs src
-del_addrs () {
- local src=$1
- ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
-s/inet/ip addr del/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
-s/${src}/dev ${src}/
-" | sh -e
-}
-
-# Usage: transfer_routes src dst
-# Get all IP routes to device $src, delete them, and
-# add the same routes to device $dst.
-# The original routes have to be deleted, otherwise adding them
-# for $dst fails (duplicate routes).
-transfer_routes () {
- local src=$1
- local dst=$2
- # List all routes and grep the ones with $src in.
- # Stick 'ip route del' on the front to delete.
- # Change $src to $dst and use 'ip route add' to add.
- ip route list | sed -ne "
-/dev ${src}\( \|$\)/ {
- h
- s/^/ip route del /
- P
- g
- s/${src}/${dst}/
- s/^/ip route add /
- P
- d
-}" | sh -e
-}
-
-# Usage: create_bridge bridge
-create_bridge () {
- local bridge=$1
-
- # Don't create the bridge if it already exists.
- if ! brctl show | grep -q ${bridge} ; then
- brctl addbr ${bridge}
- brctl stp ${bridge} off
- brctl setfd ${bridge} 0
- fi
- ifconfig ${bridge} up
-}
-
-# Usage: add_to_bridge bridge dev
-add_to_bridge () {
- local bridge=$1
- local dev=$2
- # Don't add $dev to $bridge if it's already on a bridge.
- if ! brctl show | grep -q ${dev} ; then
- brctl addif ${bridge} ${dev}
- fi
-}
-
-# Usage: antispoofing dev bridge
-# Set the default forwarding policy for $dev to drop.
-# Allow forwarding to the bridge.
-antispoofing () {
- local dev=$1
- local bridge=$2
-
- iptables -P FORWARD DROP
- iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
-}
-
-# Usage: show_status dev bridge
-# Print ifconfig and routes.
-show_status () {
- local dev=$1
- local bridge=$2
-
- echo '============================================================'
- ifconfig ${dev}
- ifconfig ${bridge}
- echo ' '
- ip route list
- echo ' '
- route -n
- echo '============================================================'
-}
-
-op_start () {
- if [ "${bridge}" == "null" ] ; then
- return
- fi
-
- create_bridge ${bridge}
-
- if ifconfig 2>/dev/null | grep -q veth0 ; then
- return
- fi
-
- if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
- # Propagate MAC address and ARP responsibilities to virtual interface.
- mac=`ifconfig ${netdev} | grep HWadd | sed -e
's/.*\(..:..:..:..:..:..\).*/\1/'`
- ifconfig veth0 down
- ifconfig veth0 hw ether ${mac}
- ifconfig veth0 arp up
- transfer_addrs ${netdev} veth0
- transfer_routes ${netdev} veth0
- del_addrs ${netdev}
- ifconfig ${netdev} -arp down
- ifconfig ${netdev} hw ether fe:ff:ff:ff:ff:ff up
- # Bring up second half of virtual device and attach it to the bridge.
- ifconfig vif0.0 up
- add_to_bridge ${bridge} vif0.0
- else
- transfer_addrs ${netdev} ${bridge}
- transfer_routes ${netdev} ${bridge}
- fi
-
- # Attach the real interface to the bridge.
- add_to_bridge ${bridge} ${netdev}
-
- if [ ${antispoof} == 'yes' ] ; then
- antispoofing ${netdev} ${bridge}
- fi
-}
-
-op_stop () {
- if [ "${bridge}" == "null" ] ; then
- return
- fi
-
- brctl delif ${bridge} ${netdev}
-
- if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
- brctl delif ${bridge} vif0.0
- ifconfig vif0.0 down
- mac=`ifconfig veth0 | grep HWadd | sed -e
's/.*\(..:..:..:..:..:..\).*/\1/'`
- ifconfig ${netdev} down
- ifconfig ${netdev} hw ether ${mac}
- ifconfig ${netdev} arp up
- transfer_addrs veth0 ${netdev}
- transfer_routes veth0 ${netdev}
- del_addrs veth0
- ifconfig veth0 -arp down
- ifconfig veth0 hw ether 00:00:00:00:00:00
- else
- transfer_routes ${bridge} ${netdev}
- fi
-}
-
-case ${OP} in
- start)
- op_start
- ;;
-
- stop)
- op_stop
- ;;
-
- status)
- show_status ${netdev} ${bridge}
- ;;
-
- *)
- echo 'Unknown command: ' ${OP} >&2
- echo 'Valid commands are: start, stop, status' >&2
- exit 1
-esac
diff -r 5f1ed597f107 -r 8799d14bef77
tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz
--- a/tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz Wed Aug 24 02:43:18 2005
+++ /dev/null Thu Aug 25 22:53:20 2005
@@ -1,7 +0,0 @@
-?ËmB qemu-vgaram-bin íÝÍk\×ÇñCòÂÁMw¢ I
-^´?XÊ?#\c?Ô¡
\ No newline at end of file
-M7okɲe?L,a9?J?oò'B×Ýô(¨1?.L?Å?´x?lJ1/?ÜçÑ}Ï?3wf?h?s7ßü¸3wFsæ?{î¹ç¾?}öpJ=ÉÉ?dAr\2'yI²&Y?\?lH??¼#ù?äE{Ϫ½Ï_×÷¿/¹bÑϾ,¹*yÊ^ßÌÞ?/yáðuy|#½vø¯òY7äõòüïé?Ã?Èë7öò?¾çP
-?~ö}Éç6ÕÜ?|?=.ÿ-«?Õõ3gϵ?^¶×Ûê¬ÿ÷=©³]©³]Y/Ítéð5ymA??ÎÛûß·:=oõ{Åæ_µïsÁ?ë÷xMò¶=Õæ?µÇÃõð°dArM²kÓ?U÷?YÉIËWYf³×j?ïmÁ×yòGÕÅìÈ÷?Ͷß%kC¾î7³vµjmCç?Xï÷GDëýfÞÊD,».ó?ä%ËJ\ݾ??ItzIru/7Óºô;'¤ÿ?ÛË'é?Ì[?×?í}öw¶{·lõ¿bóuÞE?w̶پ.¾Ëc?#??I??lInÙ
-lo^çy¼íåÛ@YÍr=aýÎU˺µ³9ËY¼ÖlzqozZêðYyí´Ì??z??ùóò·óÒ?×åñ]ißwd=}çÔ?ÿCÉr6Ís15ÛÀ??íÀuàuüyê÷?'íù?ú?J>²åº#¹+¹??¾ÿ?=¿c¯Tþ?$3©iÿº?²òO¥þv1cïü[Ý·½?íWG?Wòýï?¶½®[?ítVÚÔµ?1?äñ¦D§«ò¼'óõñ¹¬¯ýùTÛ?¦Üöò¤âqD>´væù?ä??D?c?ÉbjÆ={¾c?uÞCöüZ`´e·Èàë/IÖ$Ë?K??lþ+?×%oJ?´ùOÛTß¿.yÿÁk³é¨lëoH_{AoJV%ò|Uæ¯ÉãËòøDö9Çlª?·ùG³éÉ?äªä?Äÿö{¿æ9ÉÜÞãÙô¼?3'e<+??¶þF§úü?=~Ú>G?ã?¢Ì?evLtÿ3~4ÝÌZ{Ûµir=ó©Ùvm:?ò½¾~?²6ð?=ßÈÖý?lýÿAò²ä÷?·mÞo%/JÞ°vsFúûW$oJ~gí^_˦OYð¿]²2ÎÛkú«öÞµ÷ýBò|ö=µ|ÝNþ?}O}ÿ/MM_wߦGÃêz¸þ?ã?
\ No newline at end of file
-IOú?%ɦ,ÿ?×?ÖÙe[.?êöÿ?=~Âê@?eÓê`Ù?¿þæZê{Õ:þ:XýyßöCÚ߯e;#ÛøS{}àIé7gåõfûoÚ?NgÓÙCM¿ªmì¹Û´7?÷?ýs6ÿ?¯÷?#ß×ƵµÉõ§ûºÞÞúß?iO²$iÚ_Oê¯Ðþ¼?õ}ð§?m{>cù8õ÷½?ãÐdåkYÞïz_¸cß#ª\wøxcSòE<þÚ
*{>Ëbà2ÇÇ/kRßW÷ÆóÒ?äØ˾Ï?ä?DÇïí?áþ^ç?ýý±,û¯ßï¶ÅÛB?zñcßû©\Üþ^ß?õû?u«ß?mXÝèû.Úß½?ÍÓ1àÖ@?5ÛØë2îyW¦OÚövîÁºjú?ò3uÞ¥CýmýÝCý¾÷²=î}¯uâÉÇbµÖ?fx¼Ý]¢û=??ýßv?;þÖx;÷s~~èÏÏÁ=¨_íw·Òøã?¨ò}ÿ7ªîõµÈö???cu=?%úØW?ùQ+W÷?·RÿøÇóX`ù»VÏ?ï*/ÿB¶®½ü?"?åïë?o?y¢Û_¾¬5?¿,ïz±þµ>"Ï{ÚúùZ}ÿ¤ò£÷½m}_>.?©T×G<?NÛò?õUvÞþ½Ì¼-Ôh?ùwøO=ÚN±Û¿_wÎÏýî¦á>(ª|íïóó |