# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 1ae656509f021f7436cd6813c9b50c395d29c3bf
# Parent e3d811cca4e1d385a793cc515d72c8e671fd6267
# Parent 26c03c17c418ba106ebda01502713da2fc9d28c6
Merge.
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug
16 18:09:07 2005
@@ -130,6 +130,7 @@
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_X86_CPUID=y
+CONFIG_SWIOTLB=y
#
# Firmware Drivers
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug
16 18:09:07 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12.3-xen0
-# Mon Aug 15 11:36:25 2005
+# Linux kernel version: 2.6.12.4-xen0
+# Mon Aug 15 18:57:19 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -52,6 +52,7 @@
# CONFIG_IKCONFIG is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_PRINTK=y
CONFIG_BUG=y
@@ -122,6 +123,7 @@
# CONFIG_X86_MSR is not set
# CONFIG_GART_IOMMU is not set
CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
# CONFIG_X86_MCE is not set
#
@@ -163,6 +165,7 @@
CONFIG_STANDALONE=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
#
# Memory Technology Devices (MTD)
@@ -1060,7 +1063,22 @@
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
+# CONFIG_CHECKING is not set
+# CONFIG_INIT_DEBUG is not set
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug
16 18:09:07 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-xenU
-# Tue Aug 2 23:56:13 2005
+# Linux kernel version: 2.6.12.4-xenU
+# Mon Aug 15 19:25:22 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -30,7 +30,7 @@
#
CONFIG_EXPERIMENTAL=y
CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
@@ -48,8 +48,10 @@
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
@@ -74,6 +76,7 @@
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
@@ -86,12 +89,15 @@
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
# CONFIG_MICROCODE is not set
CONFIG_X86_CPUID=y
# CONFIG_NUMA is not set
# CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
# CONFIG_X86_LOCAL_APIC is not set
# CONFIG_X86_IO_APIC is not set
# CONFIG_PCI is not set
@@ -114,7 +120,11 @@
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_L1_CACHE_BYTES=128
# CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
# CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
CONFIG_DUMMY_IOMMU=y
# CONFIG_X86_MCE is not set
@@ -157,6 +167,7 @@
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
#
# Block devices
@@ -559,7 +570,6 @@
#
# Old SIR device drivers
#
-# CONFIG_IRPORT_SIR is not set
#
# Old Serial dongle support
@@ -861,17 +871,7 @@
# Security options
#
# CONFIG_KEYS is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_CAPABILITIES=y
-# CONFIG_SECURITY_SECLVL is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+# CONFIG_SECURITY is not set
#
# Cryptographic options
@@ -919,5 +919,19 @@
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_INIT_DEBUG is not set
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug
16 18:09:07 2005
@@ -137,6 +137,7 @@
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=m
CONFIG_X86_CPUID=m
+CONFIG_SWIOTLB=y
#
# Firmware Drivers
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug
16 18:09:07 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12.3-xen0
-# Mon Aug 15 19:46:39 2005
+# Linux kernel version: 2.6.12.4-xen
+# Mon Aug 15 19:54:11 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -35,6 +35,7 @@
# CONFIG_CLEAN_COMPILE is not set
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
@@ -50,8 +51,10 @@
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
@@ -76,6 +79,7 @@
# CONFIG_MODVERSIONS is not set
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
@@ -88,12 +92,15 @@
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
CONFIG_MICROCODE=y
# CONFIG_X86_CPUID is not set
# CONFIG_NUMA is not set
# CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_PCI=y
@@ -120,8 +127,12 @@
# CONFIG_X86_TSC is not set
CONFIG_X86_XEN_GENAPIC=y
# CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
# CONFIG_GART_IOMMU is not set
CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
# CONFIG_X86_MCE is not set
#
@@ -163,6 +174,7 @@
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
#
# Memory Technology Devices (MTD)
@@ -214,7 +226,6 @@
CONFIG_MTD_ROM=m
CONFIG_MTD_ABSENT=m
# CONFIG_MTD_OBSOLETE_CHIPS is not set
-# CONFIG_MTD_XIP is not set
#
# Mapping drivers for chip access
@@ -2395,7 +2406,21 @@
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
+# CONFIG_INIT_DEBUG is not set
diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 18:09:07 2005
@@ -533,6 +533,11 @@
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
+config SWIOTLB
+ bool
+ depends on PCI
+ default y
+
source "drivers/firmware/Kconfig"
choice
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16
18:09:07 2005
@@ -44,6 +44,7 @@
c-obj-$(CONFIG_EFI) += efi.o efi_stub.o
c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
+c-obj-$(CONFIG_SWIOTLB) += swiotlb.o
EXTRA_AFLAGS := -traditional
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16
18:09:07 2005
@@ -23,6 +23,103 @@
int flags;
unsigned long *bitmap;
};
+
+static void iommu_bug(void)
+{
+ printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n");
+ BUG();
+}
+
+#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i, rc;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+ } else {
+ for (i = 0; i < nents; i++ ) {
+ sg[i].dma_address =
+ page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_length = sg[i].length;
+ BUG_ON(!sg[i].page);
+ IOMMU_BUG_ON(address_needs_mapping(
+ hwdev, sg[i].dma_address));
+ }
+ rc = nents;
+ }
+
+ flush_write_buffers();
+ return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_sg(hwdev, sg, nents, direction);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_addr;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ dma_addr = swiotlb_map_page(
+ dev, page, offset, size, direction);
+ } else {
+ dma_addr = page_to_phys(page) + offset;
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+ }
+
+ return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_page(dev, dma_address, size, direction);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+ if (swiotlb)
+ return swiotlb_dma_mapping_error(dma_addr);
+ return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+ if (swiotlb)
+ return swiotlb_dma_supported(dev, mask);
+ /*
+ * By default we'll BUG when an infeasible DMA is requested, and
+ * request swiotlb=force (see IOMMU_BUG_ON).
+ */
+ return 1;
+}
+EXPORT_SYMBOL(dma_supported);
void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, unsigned int __nocast gfp)
@@ -54,13 +151,14 @@
ret = (void *)vstart;
if (ret != NULL) {
- xen_contig_memory(vstart, order);
+ xen_create_contiguous_region(vstart, order);
memset(ret, 0, size);
*dma_handle = virt_to_bus(ret);
}
return ret;
}
+EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle)
@@ -72,9 +170,12 @@
int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
bitmap_release_region(mem->bitmap, page, order);
- } else
+ } else {
+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
free_pages((unsigned long)vaddr, order);
-}
+ }
+}
+EXPORT_SYMBOL(dma_free_coherent);
int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
dma_addr_t device_addr, size_t size, int flags)
@@ -153,46 +254,20 @@
}
EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
- struct list_head list;
- dma_addr_t dma;
- char *bounce, *host;
- size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
dma_addr_t
dma_map_single(struct device *dev, void *ptr, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- void *bnc;
dma_addr_t dma;
- unsigned long flags;
-
- BUG_ON(direction == DMA_NONE);
-
- /*
- * Even if size is sub-page, the buffer may still straddle a page
- * boundary. Take into account buffer start offset. All other calls are
- * conservative and always search the dma_map list if it's non-empty.
- */
- if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) {
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ dma = swiotlb_map_single(dev, ptr, size, direction);
+ } else {
dma = virt_to_bus(ptr);
- } else {
- BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC))
== NULL);
- BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL);
- if (direction != DMA_FROM_DEVICE)
- memcpy(bnc, ptr, size);
- ent->dma = dma;
- ent->bounce = bnc;
- ent->host = ptr;
- ent->size = size;
- spin_lock_irqsave(&dma_map_lock, flags);
- list_add(&ent->list, &dma_map_head);
- spin_unlock_irqrestore(&dma_map_lock, flags);
+ IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma));
}
flush_write_buffers();
@@ -204,30 +279,9 @@
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags;
-
- BUG_ON(direction == DMA_NONE);
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list ) {
- if (DMA_MAP_MATCHES(ent, dma_addr)) {
- list_del(&ent->list);
- break;
- }
- }
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- BUG_ON(dma_addr != ent->dma);
- BUG_ON(size != ent->size);
- if (direction != DMA_TO_DEVICE)
- memcpy(ent->host, ent->bounce, size);
- dma_free_coherent(dev, size, ent->bounce, ent->dma);
- kfree(ent);
- }
- }
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_single(dev, dma_addr, size, direction);
}
EXPORT_SYMBOL(dma_unmap_single);
@@ -235,23 +289,8 @@
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_TO_DEVICE)*/
- memcpy(ent->host+off, ent->bounce+off, size);
- }
- }
+ if (swiotlb)
+ swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
}
EXPORT_SYMBOL(dma_sync_single_for_cpu);
@@ -259,24 +298,17 @@
dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
enum dma_data_direction direction)
{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_FROM_DEVICE)*/
- memcpy(ent->bounce+off, ent->host+off, size);
- }
- }
-
- flush_write_buffers();
+ if (swiotlb)
+ swiotlb_sync_single_for_device(dev, dma_handle, size,
direction);
}
EXPORT_SYMBOL(dma_sync_single_for_device);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 18:09:07 2005
@@ -540,16 +540,13 @@
EXPORT_SYMBOL(profile_pc);
#endif
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
s64 delta, delta_cpu;
int cpu = smp_processor_id();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+
+ write_seqlock(&xtime_lock);
do {
get_time_values_from_xen();
@@ -572,7 +569,6 @@
for (cpu = 0; cpu < num_online_cpus(); cpu++)
printk(" %d: %lld\n", cpu,
per_cpu(processed_system_time, cpu));
- return;
}
/* System-wide jiffy work. */
@@ -582,7 +578,18 @@
do_timer(regs);
}
- /* Local CPU jiffy work. */
+ if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+ update_wallclock();
+ clock_was_set();
+ }
+
+ write_sequnlock(&xtime_lock);
+
+ /*
+ * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
+ * if there is risk of deadlock if we do (since update_process_times
+ * may do scheduler rebalancing work and thus acquire runqueue locks).
+ */
while (delta_cpu >= NS_PER_TICK) {
delta_cpu -= NS_PER_TICK;
per_cpu(processed_system_time, cpu) += NS_PER_TICK;
@@ -590,29 +597,6 @@
profile_tick(CPU_PROFILING, regs);
}
- if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
- update_wallclock();
- clock_was_set();
- }
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer_interrupt(irq, NULL, regs);
- write_sequnlock(&xtime_lock);
return IRQ_HANDLED;
}
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16
18:09:07 2005
@@ -263,12 +263,9 @@
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_contig_memory(unsigned long vstart, unsigned int order)
-{
- /*
- * Ensure multi-page extents are contiguous in machine memory. This code
- * could be cleaned up some, and the number of hypercalls reduced.
- */
+/* Ensure multi-page extents are contiguous in machine memory. */
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order)
+{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -312,6 +309,49 @@
balloon_unlock(flags);
}
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long mfn, i, flags;
+
+ scrub_pages(vstart, 1 << order);
+
+ balloon_lock(flags);
+
+ /* 1. Zap current PTEs, giving away the underlying pages. */
+ for (i = 0; i < (1<<order); i++) {
+ pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+ pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ mfn = pte_mfn(*pte);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+ }
+
+ /* 2. Map new pages in place of old pages. */
+ for (i = 0; i < (1<<order); i++) {
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_increase_reservation, &mfn, 1, 0) != 1);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE),
+ __pte_ma((mfn<<PAGE_SHIFT)|__PAGE_KERNEL), 0));
+ xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i);
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn;
+ }
+
+ flush_tlb_all();
+
+ balloon_unlock(flags);
+}
+
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
unsigned long allocate_empty_lowmem_region(unsigned long pages)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 18:09:07 2005
@@ -41,6 +41,12 @@
#include <asm/sections.h>
#include <asm-xen/hypervisor.h>
+#if defined(CONFIG_SWIOTLB)
+extern void swiotlb_init(void);
+int swiotlb;
+EXPORT_SYMBOL(swiotlb);
+#endif
+
unsigned int __VMALLOC_RESERVE = 128 << 20;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -630,6 +636,10 @@
int tmp;
int bad_ppro;
unsigned long pfn;
+
+#if defined(CONFIG_SWIOTLB)
+ swiotlb_init();
+#endif
#ifndef CONFIG_DISCONTIGMEM
if (!mem_map)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 18:09:07 2005
@@ -332,10 +332,10 @@
for (i = 0; i < size; i += PAGE_SIZE) {
if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
/* Fill in the PTE pointers. */
- generic_page_range(mm, start_address,
- address-start_address,
- direct_remap_area_pte_fn, &w);
-
+ generic_page_range(mm, start_address,
+ address - start_address,
+ direct_remap_area_pte_fn, &w);
+ w = u;
if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
return -EFAULT;
v = u;
@@ -355,9 +355,8 @@
if (v != u) {
/* get the ptep's filled in */
- generic_page_range(mm, start_address,
- address-start_address,
- direct_remap_area_pte_fn, &w);
+ generic_page_range(mm, start_address, address - start_address,
+ direct_remap_area_pte_fn, &w);
if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
return -EFAULT;
}
@@ -370,32 +369,34 @@
EXPORT_SYMBOL(direct_remap_area_pages);
int create_lookup_pte_addr(struct mm_struct *mm,
- unsigned long address,
- unsigned long *ptep)
-{
- int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
- {
- unsigned long *ptep = (unsigned long *)data;
- if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
- | ((unsigned long)pte & ~PAGE_MASK);
- return 0;
- }
-
- return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
+ unsigned long address,
+ unsigned long *ptep)
+{
+ int f(pte_t *pte, struct page *pte_page, unsigned long addr,
+ void *data) {
+ unsigned long *ptep = (unsigned long *)data;
+ if (ptep)
+ *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
+ PAGE_SHIFT) |
+ ((unsigned long)pte & ~PAGE_MASK);
+ return 0;
+ }
+
+ return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
}
EXPORT_SYMBOL(create_lookup_pte_addr);
int touch_pte_range(struct mm_struct *mm,
- unsigned long address,
- unsigned long size)
-{
- int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
- {
- return 0;
- }
-
- return generic_page_range(mm, address, size, f, NULL);
+ unsigned long address,
+ unsigned long size)
+{
+ int f(pte_t *pte, struct page *pte_page, unsigned long addr,
+ void *data) {
+ return 0;
+ }
+
+ return generic_page_range(mm, address, size, f, NULL);
}
EXPORT_SYMBOL(touch_pte_range);
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 18:09:07 2005
@@ -277,7 +277,7 @@
#ifdef CONFIG_X86_PAE
/* this gives us a page below 4GB */
- xen_contig_memory((unsigned long)pgd, 0);
+ xen_create_contiguous_region((unsigned long)pgd, 0);
#endif
if (!HAVE_SHARED_KERNEL_PMD)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 18:09:07 2005
@@ -329,12 +329,12 @@
# need this always enabled with GART_IOMMU for the VIA workaround
config SWIOTLB
bool
- depends on GART_IOMMU
+ depends on PCI
default y
config DUMMY_IOMMU
bool
- depends on !GART_IOMMU && !SWIOTLB
+ depends on !GART_IOMMU
default y
help
Don't use IOMMU code. This will cause problems when you have more
than 4GB
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16
18:09:07 2005
@@ -36,8 +36,9 @@
#obj-$(CONFIG_CPU_FREQ) += cpufreq/
#obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
#obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
-obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
-#obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
+i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o
+i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
@@ -49,7 +50,7 @@
bootflag-y += ../../../i386/kernel/bootflag.o
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o
topology-y += ../../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o
+#swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o
intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16
18:09:07 2005
@@ -61,6 +61,7 @@
EXPORT_SYMBOL(dma_free_coherent);
#endif
+#if 0
int dma_supported(struct device *hwdev, u64 mask)
{
/*
@@ -76,6 +77,7 @@
return 1;
}
EXPORT_SYMBOL(dma_supported);
+#endif
int dma_get_cache_alignment(void)
{
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 18:09:07 2005
@@ -42,10 +42,6 @@
#ifndef Dprintk
#define Dprintk(x...)
-#endif
-
-#ifdef CONFIG_GART_IOMMU
-extern int swiotlb;
#endif
extern char _stext[];
@@ -790,8 +786,6 @@
return 1;
}
-extern int swiotlb_force;
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
@@ -800,14 +794,9 @@
int codesize, reservedpages, datasize, initsize;
int tmp;
-#ifdef CONFIG_SWIOTLB
- if (swiotlb_force)
- swiotlb = 1;
- if (!iommu_aperture &&
- (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
- swiotlb = 1;
- if (swiotlb)
- swiotlb_init();
+#if defined(CONFIG_SWIOTLB)
+ extern void swiotlb_init(void);
+ swiotlb_init();
#endif
/* How many end-of-memory variables you have, grandma! */
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 18:09:07 2005
@@ -23,6 +23,9 @@
blkif_be_driver_status_t be_st;
printk(KERN_INFO "Initialising Xen block tap device\n");
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ printk(KERN_INFO "Block tap is using grant tables.\n");
+#endif
DPRINTK(" tap - Backend connection init:\n");
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 18:09:07 2005
@@ -85,6 +85,11 @@
spinlock_t blk_ring_lock;
atomic_t refcnt;
struct work_struct work;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ u16 shmem_handle;
+ memory_t shmem_vaddr;
+ grant_ref_t shmem_ref;
+#endif
} blkif_t;
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug
16 18:09:07 2005
@@ -9,6 +9,7 @@
*/
#include "blktap.h"
+#include <asm-xen/evtchn.h>
static char *blkif_state_name[] = {
[BLKIF_STATE_CLOSED] = "closed",
@@ -48,12 +49,21 @@
blkif_t *blkif = (blkif_t *)arg;
ctrl_msg_t cmsg;
blkif_be_disconnect_t disc;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_unmap_grant_ref op;
+#endif
/*
* These can't be done in blkif_disconnect() because at that point there
* may be outstanding requests at the disc whose asynchronous responses
* must still be notified to the remote driver.
*/
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ op.host_addr = blkif->shmem_vaddr;
+ op.handle = blkif->shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
vfree(blkif->blk_ring.sring);
/* Construct the deferred response message. */
@@ -177,8 +187,12 @@
unsigned int evtchn = connect->evtchn;
unsigned long shmem_frame = connect->shmem_frame;
struct vm_struct *vma;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref = connect->shmem_ref;
+#else
pgprot_t prot;
int error;
+#endif
blkif_t *blkif;
blkif_sring_t *sring;
@@ -199,24 +213,46 @@
return;
}
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ prot = __pgprot(_KERNPG_TABLE);
error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
prot, domid);
if ( error != 0 )
{
- WPRINTK("BE_CONNECT: error! (%d)\n", error);
if ( error == -ENOMEM )
connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT ) {
+ else if ( error == -EFAULT )
connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- WPRINTK("BE_CONNECT: MAPPING error!\n");
- }
else
connect->status = BLKIF_BE_STATUS_ERROR;
vfree(vma->addr);
return;
}
+#else
+ { /* Map: Use the Grant table reference */
+ struct gnttab_map_grant_ref op;
+ op.host_addr = VMALLOC_VMADDR(vma->addr);
+ op.flags = GNTMAP_host_map;
+ op.ref = ref;
+ op.dom = domid;
+
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+
+ handle = op.handle;
+
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->shmem_ref = ref;
+ blkif->shmem_handle = handle;
+ blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
+ }
+#endif
if ( blkif->status != DISCONNECTED )
{
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16
18:09:07 2005
@@ -21,6 +21,9 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
#include "blktap.h"
@@ -42,6 +45,7 @@
/* local prototypes */
static int blktap_read_fe_ring(void);
static int blktap_read_be_ring(void);
+
/* -------[ mmap region ]--------------------------------------------- */
/*
@@ -73,7 +77,28 @@
((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
((_seg) * PAGE_SIZE))
-
+/* -------[ grant handles ]------------------------------------------- */
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+ u16 kernel;
+ u16 user;
+};
+static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+ (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKTAP_INVALID_HANDLE(_g) \
+ (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
+ (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+ } while(0)
+
+#endif
/* -------[ blktap vm ops ]------------------------------------------- */
@@ -348,9 +373,43 @@
/*-----[ Data to/from user space ]----------------------------------------*/
-
static void fast_flush_area(int idx, int nr_pages)
{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+ unsigned int i, op = 0;
+ struct grant_handle_pair *handle;
+ unsigned long ptep;
+
+ for (i=0; i<nr_pages; i++)
+ {
+ handle = &pending_handle(idx, i);
+ if (!BLKTAP_INVALID_HANDLE(handle))
+ {
+
+ unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+ unmap[op].dev_bus_addr = 0;
+ unmap[op].handle = handle->kernel;
+ op++;
+
+ if (create_lookup_pte_addr(blktap_vma->vm_mm,
+ MMAP_VADDR(user_vstart, idx, i),
+ &ptep) !=0) {
+ DPRINTK("Couldn't get a pte addr!\n");
+ return;
+ }
+ unmap[op].host_addr = ptep;
+ unmap[op].dev_bus_addr = 0;
+ unmap[op].handle = handle->user;
+ op++;
+
+ BLKTAP_INVALIDATE_HANDLE(handle);
+ }
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, unmap, op)))
+ BUG();
+#else
multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int i;
@@ -363,21 +422,22 @@
mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
BUG();
-}
-
-
-extern int __direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long size,
- mmu_update_t *v);
+#endif
+}
+
int blktap_write_fe_ring(blkif_request_t *req)
{
blkif_request_t *target;
- int i;
+ int i, ret = 0;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+ int op;
+#else
unsigned long remap_prot;
multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1];
mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
/*
* This is called to pass a request from the real frontend domain's
@@ -394,18 +454,109 @@
return 0;
}
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
flush_cache_all(); /* a noop on intel... */
target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
memcpy(target, req, sizeof(*req));
/* Map the foreign pages directly in to the application */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ op = 0;
+ for (i=0; i<target->nr_segments; i++) {
+
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long ptep;
+
+ uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+
+ /* Map the remote page to kernel. */
+ map[op].host_addr = kvaddr;
+ map[op].dom = ID_TO_DOM(req->id);
+ map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]);
+ map[op].flags = GNTMAP_host_map;
+ /* This needs a bit more thought in terms of interposition:
+ * If we want to be able to modify pages during write using
+ * grant table mappings, the guest will either need to allow
+ * it, or we'll need to incur a copy. */
+ if (req->operation == BLKIF_OP_WRITE)
+ map[op].flags |= GNTMAP_readonly;
+ op++;
+
+ /* Now map it to user. */
+ ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+ if (ret)
+ {
+ DPRINTK("Couldn't get a pte addr!\n");
+ goto fail;
+ }
+
+ map[op].host_addr = ptep;
+ map[op].dom = ID_TO_DOM(req->id);
+ map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]);
+ map[op].flags = GNTMAP_host_map | GNTMAP_application_map
+ | GNTMAP_contains_pte;
+ /* Above interposition comment applies here as well. */
+ if (req->operation == BLKIF_OP_WRITE)
+ map[op].flags |= GNTMAP_readonly;
+ op++;
+ }
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, map, op)))
+ BUG();
+
+ op = 0;
+ for (i=0; i<(target->nr_segments*2); i+=2) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ int cancel = 0;
+
+ uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2);
+ kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2);
+
+ if ( unlikely(map[i].handle < 0) ) {
+ DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
+ ret = map[i].handle;
+ cancel = 1;
+ }
+
+ if ( unlikely(map[i+1].handle < 0) ) {
+ DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
+ ret = map[i+1].handle;
+ cancel = 1;
+ }
+
+ if (cancel)
+ goto fail;
+
+ /* Set the necessary mappings in p2m and in the VM_FOREIGN
+ * vm_area_struct to allow user vaddr -> struct page lookups
+ * to work. This is needed for direct IO to foreign pages. */
+ phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] =
+ FOREIGN_FRAME(map[i].dev_bus_addr);
+
+ offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+ ((struct page **)blktap_vma->vm_private_data)[offset] =
+ pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+ /* Save handles for unmapping later. */
+ pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle;
+ pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle;
+ }
+
+#else
+
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
for (i=0; i<target->nr_segments; i++) {
unsigned long buf;
unsigned long uvaddr;
unsigned long kvaddr;
unsigned long offset;
+ unsigned long ptep;
buf = target->frame_and_sects[i] & PAGE_MASK;
uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
@@ -421,10 +572,14 @@
phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] =
FOREIGN_FRAME(buf >> PAGE_SHIFT);
- __direct_remap_area_pages(blktap_vma->vm_mm,
- uvaddr,
- PAGE_SIZE,
- &mmu[i]);
+ ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+ if (ret)
+ {
+ DPRINTK("error getting pte\n");
+ goto fail;
+ }
+
+ mmu[i].ptr = ptep;
mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK)
| pgprot_val(blktap_vma->vm_page_prot);
@@ -448,16 +603,17 @@
if ( unlikely(mcl[i].result != 0) )
{
DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(ID_TO_IDX(req->id), target->nr_segments);
- return -1;
+ ret = mcl[i].result;
+ goto fail;
}
}
if ( unlikely(mcl[i].result != 0) )
{
DPRINTK("direct remapping of pages to /dev/blktap failed.\n");
- return -1;
- }
-
+ ret = mcl[i].result;
+ goto fail;
+ }
+#endif /* CONFIG_XEN_BLKDEV_GRANT */
/* Mark mapped pages as reserved: */
for ( i = 0; i < target->nr_segments; i++ )
@@ -472,6 +628,10 @@
blktap_ufe_ring.req_prod_pvt++;
return 0;
+
+ fail:
+ fast_flush_area(ID_TO_IDX(req->id), target->nr_segments);
+ return ret;
}
int blktap_write_be_ring(blkif_response_t *rsp)
@@ -538,11 +698,10 @@
map[offset] = NULL;
}
-
+ fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
zap_page_range(blktap_vma,
MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0),
ar->nr_pages << PAGE_SHIFT, NULL);
- fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
write_resp_to_fe_ring(blkif, resp_s);
blktap_ufe_ring.rsp_cons = i + 1;
kick_fe_domain(blkif);
@@ -616,10 +775,16 @@
int blktap_init(void)
{
- int err;
+ int err, i, j;
if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
BUG();
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ for (i=0; i<MAX_PENDING_REQS ; i++)
+ for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+ BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+#endif
err = misc_register(&blktap_miscdev);
if ( err != 0 )
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug
16 18:09:07 2005
@@ -1,11 +1,33 @@
#ifndef _ASM_I386_DMA_MAPPING_H
#define _ASM_I386_DMA_MAPPING_H
+/*
+ * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+ * documentation.
+ */
+
+#include <linux/config.h>
#include <linux/mm.h>
-
#include <asm/cache.h>
#include <asm/io.h>
#include <asm/scatterlist.h>
+#include <asm-i386/swiotlb.h>
+
+static inline int
+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+{
+ dma_addr_t mask = 0xffffffff;
+ /* If the device has a mask, use it, otherwise default to 32 bits */
+ if (hwdev && hwdev->dma_mask)
+ mask = *hwdev->dma_mask;
+ return (addr & ~mask) != 0;
+}
+
+static inline int
+range_straddles_page_boundary(void *p, size_t size)
+{
+ return ((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE);
+}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -24,46 +46,18 @@
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction direction);
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- int i;
+extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
+extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
- BUG_ON(direction == DMA_NONE);
+extern dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction);
- for (i = 0; i < nents; i++ ) {
- BUG_ON(!sg[i].page);
-
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
- }
-
- flush_write_buffers();
- return nents;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
- size_t size, enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
- return page_to_phys(page) + offset;
-}
-
-static inline void
+extern void
dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
-}
-
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
- enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
-}
+ enum dma_data_direction direction);
extern void
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
@@ -93,34 +87,25 @@
dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
enum dma_data_direction direction)
{
+ if (swiotlb)
+ swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
+ flush_write_buffers();
}
static inline void
dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
enum dma_data_direction direction)
{
+ if (swiotlb)
+ swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
flush_write_buffers();
}
-static inline int
-dma_mapping_error(dma_addr_t dma_addr)
-{
- return 0;
-}
+extern int
+dma_mapping_error(dma_addr_t dma_addr);
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
- /*
- * we fall back to GFP_DMA when the mask isn't all 1s,
- * so we can't guarantee allocations that must be
- * within a tighter range than GFP_DMA..
- */
- if(mask < 0x00ffffff)
- return 0;
-
- return 1;
-}
+extern int
+dma_supported(struct device *dev, u64 mask);
static inline int
dma_set_mask(struct device *dev, u64 mask)
@@ -133,6 +118,7 @@
return 0;
}
+#ifdef __i386__
static inline int
dma_get_cache_alignment(void)
{
@@ -140,6 +126,9 @@
* maximum possible, to be safe */
return (1 << L1_CACHE_SHIFT_MAX);
}
+#else
+extern int dma_get_cache_alignment(void);
+#endif
#define dma_is_consistent(d) (1)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16
18:09:07 2005
@@ -43,11 +43,8 @@
struct pci_dev;
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS (0)
/* pci_unmap_{page,single} is a nop so... */
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug
16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug
16 18:09:07 2005
@@ -1,89 +1,1 @@
-#ifndef _X8664_DMA_MAPPING_H
-#define _X8664_DMA_MAPPING_H 1
-
-/*
- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
- * documentation.
- */
-
-#include <linux/config.h>
-
-#include <asm/scatterlist.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-extern dma_addr_t bad_dma_address;
-#define dma_mapping_error(x) \
- (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
-
-void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t
*dma_handle,
- unsigned gfp);
-void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle);
-
-extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size,
- enum dma_data_direction direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
- enum dma_data_direction direction);
-
-#define dma_map_page(dev,page,offset,size,dir) \
- dma_map_single((dev), page_address(page)+(offset), (size), (dir))
-
-extern void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction);
-
-extern void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
- enum dma_data_direction direction);
-
-static inline void dma_sync_sg_for_cpu(struct device *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- if (swiotlb)
- return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction);
-
- flush_write_buffers();
-}
-
-static inline void dma_sync_sg_for_device(struct device *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- if (swiotlb)
- return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction);
-
- flush_write_buffers();
-}
-
-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
-
-#define dma_unmap_page dma_unmap_single
-
-extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_get_cache_alignment(void);
-#define dma_is_consistent(h) 1
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
- return 0;
-}
-
-static inline void dma_cache_sync(void *vaddr, size_t size, enum
dma_data_direction dir)
-{
- flush_write_buffers();
-}
-#endif
+#include <asm-i386/dma-mapping.h>
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16
04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16
18:09:07 2005
@@ -79,7 +79,9 @@
#else
/* No IOMMU */
-#define PCI_DMA_BUS_IS_PHYS 1
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS (0)
+
#define pci_dac_dma_supported(pci_dev, mask) 1
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 18:09:07 2005
@@ -134,7 +134,8 @@
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
#endif /* linux < 2.6.0 */
-void xen_contig_memory(unsigned long vstart, unsigned int order);
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
/* Allocate a contiguous empty region of low memory. Return virtual start. */
diff -r e3d811cca4e1 -r 1ae656509f02 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Tue Aug 16 04:15:23 2005
+++ b/tools/console/daemon/io.c Tue Aug 16 18:09:07 2005
@@ -87,6 +87,7 @@
{
int domid;
int tty_fd;
+ bool is_dead;
struct buffer buffer;
struct domain *next;
};
@@ -156,10 +157,12 @@
dom->domid = domid;
dom->tty_fd = domain_create_tty(dom);
+ dom->is_dead = false;
dom->buffer.data = 0;
dom->buffer.size = 0;
dom->buffer.capacity = 0;
dom->buffer.max_capacity = 0;
+ dom->next = 0;
dolog(LOG_DEBUG, "New domain %d", domid);
@@ -206,6 +209,16 @@
}
}
+static void remove_dead_domains(struct domain *dom)
+{
+ if (dom == NULL) return;
+ remove_dead_domains(dom->next);
+
+ if (dom->is_dead) {
+ remove_domain(dom);
+ }
+}
+
static void handle_tty_read(struct domain *dom)
{
ssize_t len;
@@ -224,7 +237,7 @@
if (domain_is_valid(dom->domid)) {
dom->tty_fd = domain_create_tty(dom);
} else {
- remove_domain(dom);
+ dom->is_dead = true;
}
} else if (domain_is_valid(dom->domid)) {
msg.u.control.msg.length = len;
@@ -235,7 +248,7 @@
}
} else {
close(dom->tty_fd);
- remove_domain(dom);
+ dom->is_dead = true;
}
}
@@ -250,7 +263,7 @@
if (domain_is_valid(dom->domid)) {
dom->tty_fd = domain_create_tty(dom);
} else {
- remove_domain(dom);
+ dom->is_dead = true;
}
} else {
buffer_advance(&dom->buffer, len);
@@ -316,6 +329,7 @@
ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) {
+#if 0
/* FIXME */
/* This is a nasty hack. xcs does not handle the
control channels filling up well at all. We'll
@@ -325,6 +339,7 @@
going away */
tv.tv_usec = 1000;
select(0, 0, 0, 0, &tv);
+#endif
}
enum_domains();
@@ -333,13 +348,15 @@
}
for (d = dom_head; d; d = d->next) {
- if (FD_ISSET(d->tty_fd, &readfds)) {
+ if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) {
handle_tty_read(d);
}
- if (FD_ISSET(d->tty_fd, &writefds)) {
+ if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) {
handle_tty_write(d);
}
}
+
+ remove_dead_domains(dom_head);
} while (ret > -1);
}
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Domain.ml
--- a/tools/debugger/pdb/Domain.ml Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/Domain.ml Tue Aug 16 18:09:07 2005
@@ -36,6 +36,7 @@
Printf.sprintf "{domain} domain: %d, vcpu: %d"
ctx.domain ctx.vcpu
+external read_register : context_t -> int -> int32 = "dom_read_register"
external read_registers : context_t -> registers = "dom_read_registers"
external write_register : context_t -> register -> int32 -> unit =
"dom_write_register"
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Domain.mli
--- a/tools/debugger/pdb/Domain.mli Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/Domain.mli Tue Aug 16 18:09:07 2005
@@ -22,6 +22,7 @@
val string_of_context : context_t -> string
+val read_register : context_t -> int -> int32
val read_registers : context_t -> registers
val write_register : context_t -> register -> int32 -> unit
val read_memory : context_t -> int32 -> int -> int list
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Makefile
--- a/tools/debugger/pdb/Makefile Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/Makefile Tue Aug 16 18:09:07 2005
@@ -33,7 +33,8 @@
LIBS += unix str
# bc = byte-code, dc = debug byte-code
-all : patches dc
+# patches = patch linux domU source code
+all : dc
SOURCES += pdb_caml_xc.c
SOURCES += pdb_caml_domain.c pdb_caml_process.c
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/PDB.ml
--- a/tools/debugger/pdb/PDB.ml Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/PDB.ml Tue Aug 16 18:09:07 2005
@@ -219,6 +219,17 @@
(***************************************************************************)
+let read_register ctx register = (* register is int32 because of sscanf *)
+ match ctx with
+ | Void -> 0l (* default for startup *)
+ | Domain d -> Domain.read_register d register
+ | Process p ->
+ begin
+ Process.read_register p register;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "read registers")
+
let read_registers ctx =
match ctx with
| Void -> Intel.null_registers (* default for startup *)
@@ -278,14 +289,42 @@
let insert_memory_breakpoint ctx addr len =
match ctx with
| Domain d -> Domain.insert_memory_breakpoint d addr len
- | Process p -> Process.insert_memory_breakpoint p addr len
+ | Process p ->
+ begin
+ Process.insert_memory_breakpoint p addr len;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "insert memory breakpoint")
let remove_memory_breakpoint ctx addr len =
match ctx with
| Domain d -> Domain.remove_memory_breakpoint d addr len
- | Process p -> Process.remove_memory_breakpoint p addr len
+ | Process p ->
+ begin
+ Process.remove_memory_breakpoint p addr len;
+ raise No_reply
+ end
| _ -> raise (Unimplemented "remove memory breakpoint")
+
+let insert_watchpoint ctx kind addr len =
+ match ctx with
+(* | Domain d -> Domain.insert_watchpoint d kind addr len TODO *)
+ | Process p ->
+ begin
+ Process.insert_watchpoint p kind addr len;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "insert watchpoint")
+
+let remove_watchpoint ctx kind addr len =
+ match ctx with
+(* | Domain d -> Domain.remove_watchpoint d kind addr len TODO *)
+ | Process p ->
+ begin
+ Process.remove_watchpoint p kind addr len;
+ raise No_reply
+ end
+ | _ -> raise (Unimplemented "remove watchpoint")
let pause ctx =
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Process.ml
--- a/tools/debugger/pdb/Process.ml Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/Process.ml Tue Aug 16 18:09:07 2005
@@ -54,6 +54,7 @@
proc_ctx.ring <- Xen_domain.get_ring dom_ctx;
_attach_debugger proc_ctx
+external read_register : context_t -> int -> unit = "proc_read_register"
external read_registers : context_t -> unit = "proc_read_registers"
external write_register : context_t -> register -> int32 -> unit =
"proc_write_register"
@@ -69,6 +70,10 @@
"proc_insert_memory_breakpoint"
external remove_memory_breakpoint : context_t -> int32 -> int -> unit =
"proc_remove_memory_breakpoint"
+external insert_watchpoint : context_t -> int -> int32 -> int -> unit =
+ "proc_insert_watchpoint"
+external remove_watchpoint : context_t -> int -> int32 -> int -> unit =
+ "proc_remove_watchpoint"
let pause ctx =
pause_target ctx
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Process.mli
--- a/tools/debugger/pdb/Process.mli Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/Process.mli Tue Aug 16 18:09:07 2005
@@ -26,7 +26,7 @@
val detach_debugger : context_t -> unit
val pause : context_t -> unit
-
+val read_register : context_t -> int -> unit
val read_registers : context_t -> unit
val write_register : context_t -> register -> int32 -> unit
val read_memory : context_t -> int32 -> int -> unit
@@ -37,3 +37,5 @@
val insert_memory_breakpoint : context_t -> int32 -> int -> unit
val remove_memory_breakpoint : context_t -> int32 -> int -> unit
+val insert_watchpoint : context_t -> int -> int32 -> int -> unit
+val remove_watchpoint : context_t -> int -> int32 -> int -> unit
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/debugger.ml
--- a/tools/debugger/pdb/debugger.ml Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/debugger.ml Tue Aug 16 18:09:07 2005
@@ -53,10 +53,20 @@
PDB.step ctx;
raise No_reply
+(**
+ Read Register Command.
+ return register as a 4-byte value.
+ *)
+let gdb_read_register ctx command =
+ let read_reg register =
+ (Printf.sprintf "%08lx" (Util.flip_int32 (PDB.read_register ctx register)))
+ in
+ Scanf.sscanf command "p%x" read_reg
+
(**
Read Registers Command.
- returns 16 4-byte registers in a particular defined by gdb.
+ returns 16 4-byte registers in a particular format defined by gdb.
*)
let gdb_read_registers ctx =
let regs = PDB.read_registers ctx in
@@ -100,7 +110,7 @@
with
Failure s -> "E02"
in
- Scanf.sscanf command "m%lx,%d" read_mem
+ Scanf.sscanf command "m%lx,%x" read_mem
@@ -218,16 +228,24 @@
(**
Insert Breakpoint or Watchpoint Packet
*)
+
+let bwc_watch_write = 102 (* from pdb_module.h *)
+let bwc_watch_read = 103
+let bwc_watch_access = 104
+
let gdb_insert_bwcpoint ctx command =
let insert cmd addr length =
try
match cmd with
| 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK"
+ | 2 -> PDB.insert_watchpoint ctx bwc_watch_write addr length; "OK"
+ | 3 -> PDB.insert_watchpoint ctx bwc_watch_read addr length; "OK"
+ | 4 -> PDB.insert_watchpoint ctx bwc_watch_access addr length; "OK"
| _ -> ""
with
Failure s -> "E03"
in
- Scanf.sscanf command "Z%d,%lx,%d" insert
+ Scanf.sscanf command "Z%d,%lx,%x" insert
(**
Remove Breakpoint or Watchpoint Packet
@@ -237,6 +255,9 @@
try
match cmd with
| 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK"
+ | 2 -> PDB.remove_watchpoint ctx bwc_watch_write addr length; "OK"
+ | 3 -> PDB.remove_watchpoint ctx bwc_watch_read addr length; "OK"
+ | 4 -> PDB.remove_watchpoint ctx bwc_watch_access addr length; "OK"
| _ -> ""
with
Failure s -> "E04"
@@ -260,6 +281,7 @@
| 'k' -> gdb_kill ()
| 'm' -> gdb_read_memory ctx command
| 'M' -> gdb_write_memory ctx command
+ | 'p' -> gdb_read_register ctx command
| 'P' -> gdb_write_register ctx command
| 'q' -> gdb_query command
| 's' -> gdb_step ctx
@@ -270,7 +292,7 @@
| 'Z' -> gdb_insert_bwcpoint ctx command
| _ ->
print_endline (Printf.sprintf "unknown gdb command [%s]" command);
- "E02"
+ ""
with
Unimplemented s ->
print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]"
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-module/debug.c
--- a/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 18:09:07 2005
@@ -9,33 +9,143 @@
#include <asm-i386/kdebug.h>
#include <asm-xen/asm-i386/processor.h>
#include <asm-xen/asm-i386/ptrace.h>
+#include <asm-xen/asm-i386/tlbflush.h>
#include <asm-xen/xen-public/xen.h>
#include "pdb_module.h"
#include "pdb_debug.h"
-#define BWC_DEBUG 1
-#define BWC_INT3 3
+
+static int pdb_debug_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition);
+static int pdb_int3_fn (struct pt_regs *regs, long error_code);
+static int pdb_page_fault_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition);
+
+/***********************************************************************/
+
typedef struct bwcpoint /* break/watch/catch point */
{
struct list_head list;
memory_t address;
- u32 domain;
+ int length;
+
+ u8 type; /* BWC_??? */
+ u8 mode; /* for BWC_PAGE, the current protection mode */
u32 process;
- u8 old_value; /* old value for software bkpt */
- u8 type; /* BWC_??? */
+ u8 error; /* error occured when enabling: don't disable. */
+
+ /* original values */
+ u8 orig_bkpt; /* single byte breakpoint */
+ pte_t orig_pte;
+
+ struct list_head watchpt_read_list; /* read watchpoints on this page */
+ struct list_head watchpt_write_list; /* write */
+ struct list_head watchpt_access_list; /* access */
+ struct list_head watchpt_disabled_list; /* disabled */
+
+ struct bwcpoint *parent; /* watchpoint: bwc_watch (the page) */
+ struct bwcpoint *watchpoint; /* bwc_watch_step: original watchpoint */
} bwcpoint_t, *bwcpoint_p;
-static bwcpoint_t bwcpoint_list;
+static struct list_head bwcpoint_list = LIST_HEAD_INIT(bwcpoint_list);
+
+#define _pdb_bwcpoint_alloc(_var) \
+{ \
+ if ( (_var = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL)) == NULL ) \
+ printk("error: unable to allocate memory %d\n", __LINE__); \
+ else { \
+ memset(_var, 0, sizeof(bwcpoint_t)); \
+ INIT_LIST_HEAD(&_var->watchpt_read_list); \
+ INIT_LIST_HEAD(&_var->watchpt_write_list); \
+ INIT_LIST_HEAD(&_var->watchpt_access_list); \
+ INIT_LIST_HEAD(&_var->watchpt_disabled_list); \
+ } \
+}
+
+/***********************************************************************/
+
+static void _pdb_bwc_print_list (struct list_head *, char *, int);
+
+static void
+_pdb_bwc_print (bwcpoint_p bwc, char *label, int level)
+{
+ printk("%s%03d 0x%08lx:0x%02x %c\n", label, bwc->type,
+ bwc->address, bwc->length, bwc->error ? 'e' : '-');
+
+ if ( !list_empty(&bwc->watchpt_read_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_read_list, "r", level);
+ if ( !list_empty(&bwc->watchpt_write_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_write_list, "w", level);
+ if ( !list_empty(&bwc->watchpt_access_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_access_list, "a", level);
+ if ( !list_empty(&bwc->watchpt_disabled_list) )
+ _pdb_bwc_print_list(&bwc->watchpt_disabled_list, "d", level);
+}
+
+static void
+_pdb_bwc_print_list (struct list_head *bwc_list, char *label, int level)
+{
+ struct list_head *ptr;
+ int counter = 0;
+
+ list_for_each(ptr, bwc_list)
+ {
+ bwcpoint_p bwc = list_entry(ptr, bwcpoint_t, list);
+ printk(" %s[%02d]%s ", level > 0 ? " " : "", counter++,
+ level > 0 ? "" : " ");
+ _pdb_bwc_print(bwc, label, level+1);
+ }
+
+ if (counter == 0)
+ {
+ printk(" empty list\n");
+ }
+}
void
-pdb_initialize_bwcpoint (void)
-{
- memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t));
- INIT_LIST_HEAD(&bwcpoint_list.list);
-
- return;
-}
-
+pdb_bwc_print_list (void)
+{
+ _pdb_bwc_print_list(&bwcpoint_list, " ", 0);
+}
+
+bwcpoint_p
+pdb_search_watchpoint (u32 process, memory_t address)
+{
+ bwcpoint_p bwc_watch = (bwcpoint_p) 0;
+ bwcpoint_p bwc_entry = (bwcpoint_p) 0;
+ struct list_head *ptr;
+
+ list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+ if (bwc_watch->address == (address & PAGE_MASK)) break;
+ }
+
+ if ( !bwc_watch )
+ {
+ return (bwcpoint_p) 0;
+ }
+
+#define __pdb_search_watchpoint_list(__list) \
+ list_for_each(ptr, (__list)) \
+ { \
+ bwc_entry = list_entry(ptr, bwcpoint_t, list); \
+ if ( bwc_entry->process == process && \
+ bwc_entry->address <= address && \
+ bwc_entry->address + bwc_entry->length > address ) \
+ return bwc_entry; \
+ }
+
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_read_list);
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_write_list);
+ __pdb_search_watchpoint_list(&bwc_watch->watchpt_access_list);
+
+#undef __pdb_search_watchpoint_list
+
+ return (bwcpoint_p) 0;
+}
+
+/*************************************************************/
int
pdb_suspend (struct task_struct *target)
@@ -134,6 +244,35 @@
*(unsigned long *) stack = value;
return;
+}
+
+int
+pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op)
+{
+ int rc = 0;
+
+ switch (op->reg)
+ {
+ case 0: op->value = _pdb_get_register(target, LINUX_EAX); break;
+ case 1: op->value = _pdb_get_register(target, LINUX_ECX); break;
+ case 2: op->value = _pdb_get_register(target, LINUX_EDX); break;
+ case 3: op->value = _pdb_get_register(target, LINUX_EBX); break;
+ case 4: op->value = _pdb_get_register(target, LINUX_ESP); break;
+ case 5: op->value = _pdb_get_register(target, LINUX_EBP); break;
+ case 6: op->value = _pdb_get_register(target, LINUX_ESI); break;
+ case 7: op->value = _pdb_get_register(target, LINUX_EDI); break;
+ case 8: op->value = _pdb_get_register(target, LINUX_EIP); break;
+ case 9: op->value = _pdb_get_register(target, LINUX_EFL); break;
+
+ case 10: op->value = _pdb_get_register(target, LINUX_CS); break;
+ case 11: op->value = _pdb_get_register(target, LINUX_SS); break;
+ case 12: op->value = _pdb_get_register(target, LINUX_DS); break;
+ case 13: op->value = _pdb_get_register(target, LINUX_ES); break;
+ case 14: op->value = _pdb_get_register(target, LINUX_FS); break;
+ case 15: op->value = _pdb_get_register(target, LINUX_GS); break;
+ }
+
+ return rc;
}
int
@@ -209,18 +348,14 @@
eflags |= X86_EFLAGS_TF;
_pdb_set_register(target, LINUX_EFL, eflags);
- bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
- if ( bkpt == NULL )
- {
- printk("error: unable to allocation memory\n");
- return -1;
- }
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
bkpt->process = target->pid;
bkpt->address = 0;
bkpt->type = BWC_DEBUG;
- list_add(&bkpt->list, &bwcpoint_list.list);
+ list_add_tail(&bkpt->list, &bwcpoint_list);
wake_up_process(target);
@@ -237,31 +372,27 @@
printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length);
- bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
- if ( bkpt == NULL )
- {
- printk("error: unable to allocation memory\n");
+ if ( length != 1 )
+ {
+ printk("error: breakpoint length should be 1\n");
return -1;
}
- if ( length != 1 )
- {
- printk("error: breakpoint length should be 1\n");
- kfree(bkpt);
- return -1;
- }
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
bkpt->process = target->pid;
bkpt->address = address;
bkpt->type = BWC_INT3;
- pdb_access_memory(target, address, &bkpt->old_value, 1, 0);
- pdb_access_memory(target, address, &breakpoint_opcode, 1, 1);
+ pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_READ);
+ pdb_access_memory(target, address, &breakpoint_opcode, 1, PDB_MEM_WRITE);
- list_add(&bkpt->list, &bwcpoint_list.list);
+ list_add_tail(&bkpt->list, &bwcpoint_list);
printk("breakpoint_set %d:%lx OLD: 0x%x\n",
- target->pid, address, bkpt->old_value);
+ target->pid, address, bkpt->orig_bkpt);
+ pdb_bwc_print_list();
return rc;
}
@@ -276,7 +407,7 @@
printk ("remove breakpoint %d:%lx\n", target->pid, address);
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( target->pid == bkpt->process &&
@@ -285,17 +416,223 @@
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
+ if (entry == &bwcpoint_list)
{
printk ("error: no breakpoint found\n");
return -1;
}
+ pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_WRITE);
+
list_del(&bkpt->list);
-
- pdb_access_memory(target, address, &bkpt->old_value, 1, 1);
-
kfree(bkpt);
+
+ pdb_bwc_print_list();
+
+ return rc;
+}
+
+#define PDB_PTE_UPDATE 1
+#define PDB_PTE_RESTORE 2
+
+int
+pdb_change_pte (struct task_struct *target, bwcpoint_p bwc, int mode)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep;
+
+ pgd = pgd_offset(target->mm, bwc->address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return -1;
+
+ pud = pud_offset(pgd, bwc->address);
+ if (pud_none(*pud) || unlikely(pud_bad(*pud))) return -2;
+
+ pmd = pmd_offset(pud, bwc->address);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return -3;
+
+ ptep = pte_offset_map(pmd, bwc->address);
+ if (!ptep) return -4;
+
+ switch ( mode )
+ {
+ case PDB_PTE_UPDATE: /* added or removed a watchpoint. update pte. */
+ {
+ pte_t new_pte;
+
+ if ( pte_val(bwc->parent->orig_pte) == 0 ) /* new watchpoint page */
+ {
+ bwc->parent->orig_pte = *ptep;
+ }
+
+ new_pte = bwc->parent->orig_pte;
+
+ if ( !list_empty(&bwc->parent->watchpt_read_list) ||
+ !list_empty(&bwc->parent->watchpt_access_list) )
+ {
+ new_pte = pte_rdprotect(new_pte);
+ }
+
+ if ( !list_empty(&bwc->parent->watchpt_write_list) ||
+ !list_empty(&bwc->parent->watchpt_access_list) )
+ {
+ new_pte = pte_wrprotect(new_pte);
+ }
+
+ if ( pte_val(new_pte) != pte_val(*ptep) )
+ {
+ *ptep = new_pte;
+ flush_tlb_mm(target->mm);
+ }
+ break;
+ }
+ case PDB_PTE_RESTORE : /* suspend watchpoint by restoring original pte */
+ {
+ *ptep = bwc->parent->orig_pte;
+ flush_tlb_mm(target->mm);
+ break;
+ }
+ default :
+ {
+ printk("(linux) unknown mode %d %d\n", mode, __LINE__);
+ break;
+ }
+ }
+
+ pte_unmap(ptep); /* can i flush the tlb before pte_unmap? */
+
+ return rc;
+}
+
+int
+pdb_insert_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+ int rc = 0;
+
+ bwcpoint_p bwc_watch;
+ bwcpoint_p bwc_entry;
+ struct list_head *ptr;
+ unsigned long page = watchpt->address & PAGE_MASK;
+ struct list_head *watchpoint_list;
+
+ printk("insert watchpoint: %d %x %x\n",
+ watchpt->type, watchpt->address, watchpt->length);
+
+ list_for_each(ptr, &bwcpoint_list) /* find existing bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+
+ if (bwc_watch->address == page) goto got_bwc_watch;
+ }
+
+ _pdb_bwcpoint_alloc(bwc_watch); /* create new bwc:watch */
+ if ( bwc_watch == NULL ) return -1;
+
+ bwc_watch->type = BWC_WATCH;
+ bwc_watch->process = target->pid;
+ bwc_watch->address = page;
+
+ list_add_tail(&bwc_watch->list, &bwcpoint_list);
+
+ got_bwc_watch:
+
+ switch (watchpt->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &bwc_watch->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &bwc_watch->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &bwc_watch->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", watchpt->type); return -2;
+ }
+
+ _pdb_bwcpoint_alloc(bwc_entry); /* create new bwc:entry */
+ if ( bwc_entry == NULL ) return -1;
+
+ bwc_entry->process = target->pid;
+ bwc_entry->address = watchpt->address;
+ bwc_entry->length = watchpt->length;
+ bwc_entry->type = watchpt->type;
+ bwc_entry->parent = bwc_watch;
+
+ list_add_tail(&bwc_entry->list, watchpoint_list);
+ pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+
+ pdb_bwc_print_list();
+
+ return rc;
+}
+
+int
+pdb_remove_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+ int rc = 0;
+ bwcpoint_p bwc_watch = (bwcpoint_p) NULL;
+ bwcpoint_p bwc_entry = (bwcpoint_p) NULL;
+ unsigned long page = watchpt->address & PAGE_MASK;
+ struct list_head *ptr;
+ struct list_head *watchpoint_list;
+
+ printk("remove watchpoint: %d %x %x\n",
+ watchpt->type, watchpt->address, watchpt->length);
+
+ list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */
+ {
+ bwc_watch = list_entry(ptr, bwcpoint_t, list);
+ if (bwc_watch->address == page) break;
+ }
+
+ if ( !bwc_watch )
+ {
+ printk("(linux) delete watchpoint: can't find bwc page 0x%08x\n",
+ watchpt->address);
+ return -1;
+ }
+
+ switch (watchpt->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &bwc_watch->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &bwc_watch->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &bwc_watch->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", watchpt->type); return -2;
+ }
+
+ list_for_each(ptr, watchpoint_list) /* find watchpoint */
+ {
+ bwc_entry = list_entry(ptr, bwcpoint_t, list);
+ if ( bwc_entry->address == watchpt->address &&
+ bwc_entry->length == watchpt->length ) break;
+ }
+
+ if ( !bwc_entry ) /* or ptr == watchpoint_list */
+ {
+ printk("(linux) delete watchpoint: can't find watchpoint 0x%08x\n",
+ watchpt->address);
+ return -1;
+ }
+
+ list_del(&bwc_entry->list);
+ pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+ kfree(bwc_entry);
+
+
+ if ( list_empty(&bwc_watch->watchpt_read_list) &&
+ list_empty(&bwc_watch->watchpt_write_list) &&
+ list_empty(&bwc_watch->watchpt_access_list) )
+ {
+ list_del(&bwc_watch->list);
+ kfree(bwc_watch);
+ }
+
+ pdb_bwc_print_list();
return rc;
}
@@ -312,16 +649,24 @@
switch (val)
{
case DIE_DEBUG:
- if (pdb_debug_fn(args->regs, args->trapnr, args->err))
+ if ( pdb_debug_fn(args->regs, args->trapnr, args->err) )
return NOTIFY_STOP;
break;
case DIE_TRAP:
- if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err))
+ if ( args->trapnr == 3 && pdb_int3_fn(args->regs, args->err) )
return NOTIFY_STOP;
break;
case DIE_INT3: /* without kprobes, we should never see
DIE_INT3 */
+ if ( pdb_int3_fn(args->regs, args->err) )
+ return NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ if ( pdb_page_fault_fn(args->regs, args->trapnr, args->err) )
+ return NOTIFY_STOP;
+ break;
case DIE_GPF:
- case DIE_PAGE_FAULT:
+ printk("---------------GPF\n");
+ break;
default:
break;
}
@@ -330,70 +675,110 @@
}
-int
+static int
pdb_debug_fn (struct pt_regs *regs, long error_code,
unsigned int condition)
{
pdb_response_t resp;
bwcpoint_p bkpt = NULL;
-
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+
+ printk("pdb_debug_fn\n");
+
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( current->pid == bkpt->process &&
- bkpt->type == BWC_DEBUG )
+ (bkpt->type == BWC_DEBUG || /* single step */
+ bkpt->type == BWC_WATCH_STEP)) /* single step over watchpoint */
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
+ if (entry == &bwcpoint_list)
{
printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip);
return 0;
}
- list_del(&bkpt->list);
-
pdb_suspend(current);
- printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+ printk("(pdb) %s pid: %d, eip: 0x%08lx\n",
+ bkpt->type == BWC_DEBUG ? "debug" : "watch-step",
+ current->pid, regs->eip);
regs->eflags &= ~X86_EFLAGS_TF;
set_tsk_thread_flag(current, TIF_SINGLESTEP);
- resp.operation = PDB_OPCODE_STEP;
+ switch (bkpt->type)
+ {
+ case BWC_DEBUG:
+ resp.operation = PDB_OPCODE_STEP;
+ break;
+ case BWC_WATCH_STEP:
+ {
+ struct list_head *watchpoint_list;
+ bwcpoint_p watch_page = bkpt->watchpoint->parent;
+
+ switch (bkpt->watchpoint->type)
+ {
+ case BWC_WATCH_READ:
+ watchpoint_list = &watch_page->watchpt_read_list; break;
+ case BWC_WATCH_WRITE:
+ watchpoint_list = &watch_page->watchpt_write_list; break;
+ case BWC_WATCH_ACCESS:
+ watchpoint_list = &watch_page->watchpt_access_list; break;
+ default:
+ printk("unknown type %d\n", bkpt->watchpoint->type); return 0;
+ }
+
+ resp.operation = PDB_OPCODE_WATCHPOINT;
+ list_del_init(&bkpt->watchpoint->list);
+ list_add_tail(&bkpt->watchpoint->list, watchpoint_list);
+ pdb_change_pte(current, bkpt->watchpoint, PDB_PTE_UPDATE);
+ pdb_bwc_print_list();
+ break;
+ }
+ default:
+ printk("unknown breakpoint type %d %d\n", __LINE__, bkpt->type);
+ return 0;
+ }
+
resp.process = current->pid;
resp.status = PDB_RESPONSE_OKAY;
pdb_send_response(&resp);
+ list_del(&bkpt->list);
+ kfree(bkpt);
+
return 1;
}
-int
+static int
pdb_int3_fn (struct pt_regs *regs, long error_code)
{
pdb_response_t resp;
bwcpoint_p bkpt = NULL;
+ memory_t address = regs->eip - 1;
struct list_head *entry;
- list_for_each(entry, &bwcpoint_list.list)
+ list_for_each(entry, &bwcpoint_list)
{
bkpt = list_entry(entry, bwcpoint_t, list);
if ( current->pid == bkpt->process &&
- regs->eip == bkpt->address &&
+ address == bkpt->address &&
bkpt->type == BWC_INT3 )
break;
}
- if (bkpt == &bwcpoint_list || bkpt == NULL)
- {
- printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip);
+ if (entry == &bwcpoint_list)
+ {
+ printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, address);
return 0;
}
- printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+ printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, address);
pdb_suspend(current);
@@ -405,6 +790,54 @@
return 1;
}
+
+static int
+pdb_page_fault_fn (struct pt_regs *regs, long error_code,
+ unsigned int condition)
+{
+ unsigned long cr2;
+ unsigned long cr3;
+ bwcpoint_p bwc;
+ bwcpoint_p watchpt;
+ bwcpoint_p bkpt;
+
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
+
+ bwc = pdb_search_watchpoint(current->pid, cr2);
+ if ( !bwc )
+ {
+ return 0; /* not mine */
+ }
+
+ printk("page_fault cr2:%08lx err:%lx eip:%08lx\n",
+ cr2, error_code, regs->eip);
+
+ /* disable the watchpoint */
+ watchpt = bwc->watchpoint;
+ list_del_init(&bwc->list);
+ list_add_tail(&bwc->list, &bwc->parent->watchpt_disabled_list);
+ pdb_change_pte(current, bwc, PDB_PTE_RESTORE);
+
+ /* single step the faulting instruction */
+ regs->eflags |= X86_EFLAGS_TF;
+
+ /* create a bwcpoint entry so we know what to do once we regain control */
+ _pdb_bwcpoint_alloc(bkpt);
+ if ( bkpt == NULL ) return -1;
+
+ bkpt->process = current->pid;
+ bkpt->address = 0;
+ bkpt->type = BWC_WATCH_STEP;
+ bkpt->watchpoint = bwc;
+
+ /* add to head so we see it first the next time we break */
+ list_add(&bkpt->list, &bwcpoint_list);
+
+ pdb_bwc_print_list();
+ return 1;
+}
+
/*
* Local variables:
diff -r e3d811cca4e1 -r 1ae656509f02
tools/debugger/pdb/linux-2.6-module/module.c
--- a/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 18:09:07 2005
@@ -98,6 +98,11 @@
printk("(linux) detach 0x%x\n", request->process);
resp.status = PDB_RESPONSE_OKAY;
break;
+ case PDB_OPCODE_RD_REG :
+ resp.u.rd_reg.reg = request->u.rd_reg.reg;
+ pdb_read_register(target, &resp.u.rd_reg);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
case PDB_OPCODE_RD_REGS :
pdb_read_registers(target, &resp.u.rd_regs);
resp.status = PDB_RESPONSE_OKAY;
@@ -108,14 +113,16 @@
break;
case PDB_OPCODE_RD_MEM :
pdb_access_memory(target, request->u.rd_mem.address,
- &resp.u.rd_mem.data, request->u.rd_mem.length, 0);
+ &resp.u.rd_mem.data, request->u.rd_mem.length,
+ PDB_MEM_READ);
resp.u.rd_mem.address = request->u.rd_mem.address;
resp.u.rd_mem.length = request->u.rd_mem.length;
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_WR_MEM :
pdb_access_memory(target, request->u.wr_mem.address,
- &request->u.wr_mem.data, request->u.wr_mem.length, 1);
+ &request->u.wr_mem.data, request->u.wr_mem.length,
+ PDB_MEM_WRITE);
resp.status = PDB_RESPONSE_OKAY;
break;
case PDB_OPCODE_CONTINUE :
@@ -135,6 +142,14 @@
case PDB_OPCODE_CLR_BKPT :
pdb_remove_memory_breakpoint(target, request->u.bkpt.address,
request->u.bkpt.length);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_SET_WATCHPT :
+ pdb_insert_watchpoint(target, &request->u.watchpt);
+ resp.status = PDB_RESPONSE_OKAY;
+ break;
+ case PDB_OPCODE_CLR_WATCHPT :
+ pdb_remove_watchpoint(target, &request->u.watchpt);
resp.status = PDB_RESPONSE_OKAY;
break;
default:
@@ -248,8 +263,6 @@
pdb_sring_t *sring;
printk("----\npdb initialize %s %s\n", __DATE__, __TIME__);
-
- pdb_initialize_bwcpoint();
/*
if ( xen_start_info.flags & SIF_INITDOMAIN )
diff -r e3d811cca4e1 -r 1ae656509f02
tools/debugger/pdb/linux-2.6-module/pdb_debug.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 18:09:07 2005
@@ -6,6 +6,7 @@
void pdb_initialize_bwcpoint (void);
int pdb_suspend (struct task_struct *target);
int pdb_resume (struct task_struct *target);
+int pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op);
int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op);
int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op);
int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req,
@@ -20,13 +21,13 @@
memory_t address, u32 length);
int pdb_remove_memory_breakpoint (struct task_struct *target,
memory_t address, u32 length);
+int pdb_insert_watchpoint (struct task_struct *target,
+ pdb_op_watchpt_p watchpt);
+int pdb_remove_watchpoint (struct task_struct *target,
+ pdb_op_watchpt_p watchpt);
int pdb_exceptions_notify (struct notifier_block *self, unsigned long val,
void *data);
-
-int pdb_debug_fn (struct pt_regs *regs, long error_code,
- unsigned int condition);
-int pdb_int3_fn (struct pt_regs *regs, long error_code);
/* module.c */
void pdb_send_response (pdb_response_t *response);
diff -r e3d811cca4e1 -r 1ae656509f02
tools/debugger/pdb/linux-2.6-module/pdb_module.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 18:09:07 2005
@@ -14,20 +14,27 @@
#define PDB_OPCODE_DETACH 3
-#define PDB_OPCODE_RD_REGS 4
+#define PDB_OPCODE_RD_REG 4
+typedef struct pdb_op_rd_reg
+{
+ u32 reg;
+ u32 value;
+} pdb_op_rd_reg_t, *pdb_op_rd_reg_p;
+
+#define PDB_OPCODE_RD_REGS 5
typedef struct pdb_op_rd_regs
{
u32 reg[GDB_REGISTER_FRAME_SIZE];
} pdb_op_rd_regs_t, *pdb_op_rd_regs_p;
-#define PDB_OPCODE_WR_REG 5
+#define PDB_OPCODE_WR_REG 6
typedef struct pdb_op_wr_reg
{
u32 reg;
u32 value;
} pdb_op_wr_reg_t, *pdb_op_wr_reg_p;
-#define PDB_OPCODE_RD_MEM 6
+#define PDB_OPCODE_RD_MEM 7
typedef struct pdb_op_rd_mem_req
{
u32 address;
@@ -41,7 +48,7 @@
u8 data[1024];
} pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p;
-#define PDB_OPCODE_WR_MEM 7
+#define PDB_OPCODE_WR_MEM 8
typedef struct pdb_op_wr_mem
{
u32 address;
@@ -49,16 +56,33 @@
u8 data[1024]; /* arbitrary */
} pdb_op_wr_mem_t, *pdb_op_wr_mem_p;
-#define PDB_OPCODE_CONTINUE 8
-#define PDB_OPCODE_STEP 9
+#define PDB_OPCODE_CONTINUE 9
+#define PDB_OPCODE_STEP 10
-#define PDB_OPCODE_SET_BKPT 10
-#define PDB_OPCODE_CLR_BKPT 11
+#define PDB_OPCODE_SET_BKPT 11
+#define PDB_OPCODE_CLR_BKPT 12
typedef struct pdb_op_bkpt
{
u32 address;
u32 length;
} pdb_op_bkpt_t, *pdb_op_bkpt_p;
+
+#define PDB_OPCODE_SET_WATCHPT 13
+#define PDB_OPCODE_CLR_WATCHPT 14
+#define PDB_OPCODE_WATCHPOINT 15
+typedef struct pdb_op_watchpt
+{
+#define BWC_DEBUG 1
+#define BWC_INT3 3
+#define BWC_WATCH 100 /* pdb: watchpoint page */
+#define BWC_WATCH_STEP 101 /* pdb: watchpoint single step */
+#define BWC_WATCH_WRITE 102
+#define BWC_WATCH_READ 103
+#define BWC_WATCH_ACCESS 104
+ u32 type;
+ u32 address;
+ u32 length;
+} pdb_op_watchpt_t, *pdb_op_watchpt_p;
typedef struct
@@ -68,10 +92,12 @@
union
{
pdb_op_attach_t attach;
+ pdb_op_rd_reg_t rd_reg;
pdb_op_wr_reg_t wr_reg;
pdb_op_rd_mem_req_t rd_mem;
pdb_op_wr_mem_t wr_mem;
pdb_op_bkpt_t bkpt;
+ pdb_op_watchpt_t watchpt;
} u;
} pdb_request_t, *pdb_request_p;
@@ -87,6 +113,7 @@
s16 status; /* PDB_RESPONSE_??? */
union
{
+ pdb_op_rd_reg_t rd_reg;
pdb_op_rd_regs_t rd_regs;
pdb_op_rd_mem_resp_t rd_mem;
} u;
@@ -94,6 +121,11 @@
DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t);
+
+
+/* from access_process_vm */
+#define PDB_MEM_READ 0
+#define PDB_MEM_WRITE 1
#endif
diff -r e3d811cca4e1 -r 1ae656509f02
tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch
--- a/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16
04:15:23 2005
+++ b/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16
18:09:07 2005
@@ -1,7 +1,15 @@
diff -u linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c
linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c
--- linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c 2005-07-31
22:36:50.000000000 +0100
+++ linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 2005-08-01
10:57:31.000000000 +0100
-@@ -172,6 +172,7 @@
+@@ -151,6 +151,7 @@
+ /* TLB flushing */
+ EXPORT_SYMBOL(flush_tlb_page);
+ #endif
++EXPORT_SYMBOL(flush_tlb_mm);
+
+ #ifdef CONFIG_X86_IO_APIC
+ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+@@ -172,6 +173,7 @@
EXPORT_SYMBOL_GPL(unset_nmi_callback);
EXPORT_SYMBOL(register_die_notifier);
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/pdb_caml_domain.c
--- a/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 18:09:07 2005
@@ -41,6 +41,54 @@
/****************************************************************************/
+
+/*
+ * dom_read_register : context_t -> int -> int32
+ */
+value
+dom_read_register (value context, value reg)
+{
+ CAMLparam2(context, reg);
+ CAMLlocal1(result);
+
+ int my_reg = Int_val(reg);
+ cpu_user_regs_t *regs;
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, ®s) )
+ {
+ printf("(pdb) read registers error!\n"); fflush(stdout);
+ failwith("read registers error");
+ }
+
+ dump_regs(regs);
+
+ result = caml_alloc_tuple(16);
+
+ switch (my_reg)
+ {
+ case GDB_EAX: result = caml_copy_int32(regs->eax); break;
+ case GDB_ECX: result = caml_copy_int32(regs->ecx); break;
+ case GDB_EDX: result = caml_copy_int32(regs->edx); break;
+ case GDB_EBX: result = caml_copy_int32(regs->ebx); break;
+ case GDB_ESP: result = caml_copy_int32(regs->esp); break;
+ case GDB_EBP: result = caml_copy_int32(regs->ebp); break;
+ case GDB_ESI: result = caml_copy_int32(regs->esi); break;
+ case GDB_EDI: result = caml_copy_int32(regs->edi); break;
+ case GDB_EIP: result = caml_copy_int32(regs->eip); break;
+ case GDB_EFL: result = caml_copy_int32(regs->eflags); break;
+ case GDB_CS: result = caml_copy_int32(regs->cs); break;
+ case GDB_SS: result = caml_copy_int32(regs->ss); break;
+ case GDB_DS: result = caml_copy_int32(regs->ds); break;
+ case GDB_ES: result = caml_copy_int32(regs->es); break;
+ case GDB_FS: result = caml_copy_int32(regs->fs); break;
+ case GDB_GS: result = caml_copy_int32(regs->gs); break;
+ }
+
+ CAMLreturn(result);
+}
/*
* dom_read_registers : context_t -> int32
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/pdb_caml_process.c
--- a/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 18:09:07 2005
@@ -113,6 +113,12 @@
case PDB_OPCODE_DETACH :
break;
+ case PDB_OPCODE_RD_REG :
+ {
+ sprintf(&msg[0], "%08x", _flip(resp->u.rd_reg.value));
+ break;
+ }
+
case PDB_OPCODE_RD_REGS :
{
int loop;
@@ -161,16 +167,22 @@
}
case PDB_OPCODE_SET_BKPT :
- {
- break;
- }
case PDB_OPCODE_CLR_BKPT :
- {
+ case PDB_OPCODE_SET_WATCHPT :
+ case PDB_OPCODE_CLR_WATCHPT :
+ {
+ break;
+ }
+
+ case PDB_OPCODE_WATCHPOINT :
+ {
+ sprintf(msg, "S05");
break;
}
default :
- printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n");
+ printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE %d\n",
+ resp->operation);
break;
}
@@ -258,6 +270,32 @@
CAMLreturn(Val_unit);
}
+
+
+/*
+ * proc_read_register : context_t -> int -> unit
+ */
+value
+proc_read_register (value context, value reg)
+{
+ CAMLparam1(context);
+
+ pdb_request_t req;
+ context_t ctx;
+ int my_reg = Int_val(reg);
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_RD_REG;
+ req.process = ctx.process;
+ req.u.rd_reg.reg = my_reg;
+ req.u.rd_reg.value = 0;
+
+ send_request (ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
/*
@@ -443,7 +481,7 @@
/*
- * proc_insert_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_insert_memory_breakpoint : context_t -> int32 -> int -> unit
*/
value
proc_insert_memory_breakpoint (value context, value address, value length)
@@ -466,7 +504,7 @@
}
/*
- * proc_remove_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_remove_memory_breakpoint : context_t -> int32 -> int -> unit
*/
value
proc_remove_memory_breakpoint (value context, value address, value length)
@@ -482,6 +520,54 @@
req.process = ctx.process;
req.u.bkpt.address = (memory_t) Int32_val(address);
req.u.bkpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_insert_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_insert_watchpoint (value context, value kind, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_SET_WATCHPT;
+ req.process = ctx.process;
+ req.u.watchpt.type = Int_val(kind);
+ req.u.watchpt.address = (memory_t) Int32_val(address);
+ req.u.watchpt.length = Int_val(length);
+
+ send_request(ctx.ring, ctx.evtchn, &req);
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_remove_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_remove_watchpoint (value context, value kind, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+ pdb_request_t req;
+
+ decode_context(&ctx, context);
+
+ req.operation = PDB_OPCODE_CLR_WATCHPT;
+ req.process = ctx.process;
+ req.u.watchpt.type = Int_val(kind);
+ req.u.watchpt.address = (memory_t) Int32_val(address);
+ req.u.watchpt.length = Int_val(length);
send_request(ctx.ring, ctx.evtchn, &req);
diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/readme
--- a/tools/debugger/pdb/readme Tue Aug 16 04:15:23 2005
+++ b/tools/debugger/pdb/readme Tue Aug 16 18:09:07 2005
@@ -1,9 +1,9 @@
-PDB 0.3
+PDB 0.3.3
http://www.cl.cam.ac.uk/netos/pdb
Alex Ho
-June 2005
+August 2005
This is the latest incarnation of the pervasive debugger.
@@ -79,6 +79,11 @@
Process
PDB can also debug a process running in a Linux 2.6 domain.
+ You will need to patch the Linux 2.6 domain U tree to export some
+ additional symbols for the pdb module
+
+ % make -C linux-2.6-patches
+
After running PDB in domain 0, insert the pdb module in dom u:
% insmod linux-2.6-module/pdb.ko
@@ -87,7 +92,14 @@
(gdb) maint packet x context = process <domid> <pid>
+ Read, write, and access watchpoint should also work for processes,
+ use the "rwatch", "watch" and "awatch" gdb commands respectively.
+
+ If you are having trouble with GDB 5.3 (i386-redhat-linux-gnu),
+ try GDB 6.3 (configured with --target=i386-linux-gnu).
+
+
To Do
-- watchpoints
+- watchpoints for domains
- support for SMP
diff -r e3d811cca4e1 -r 1ae656509f02 tools/examples/network-bridge
--- a/tools/examples/network-bridge Tue Aug 16 04:15:23 2005
+++ b/tools/examples/network-bridge Tue Aug 16 18:09:07 2005
@@ -188,12 +188,13 @@
fi
fi
ip link set ${netdev} name p${netdev}
- ip link set veth0 name eth0
+ ip link set veth0 name ${netdev}
ifconfig p${netdev} -arp down
ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
ifconfig ${netdev} hw ether ${mac}
add_to_bridge ${bridge} vif0.0
add_to_bridge ${bridge} p${netdev}
+ ip link set ${bridge} up
ip link set vif0.0 up
ip link set p${netdev} up
if ! ifup ${netdev} ; then
diff -r e3d811cca4e1 -r 1ae656509f02 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 04:15:23 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 18:09:07 2005
@@ -583,7 +583,7 @@
self.create_channel()
self.image.createImage()
self.exportToDB()
- if self.store_channel:
+ if self.store_channel and self.store_mfn >= 0:
self.db.introduceDomain(self.id,
self.store_mfn,
self.store_channel)
@@ -915,8 +915,7 @@
"""
self.configure_fields()
self.create_devices()
- if self.image.ostype != 'vmx':
- self.create_blkif()
+ self.create_blkif()
def create_blkif(self):
"""Create the block device interface (blkif) for the vm.
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- /dev/null Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Tue Aug 16
18:09:07 2005
@@ -0,0 +1,653 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is a fallback for platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
+
+#define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) +
(sg)->offset)
+
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2. What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE 128
+
+/*
+ * log of the size of each IO TLB slab. The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+int swiotlb_force;
+
+/*
+ * Used to do a quick range check in swiotlb_unmap_single and
+ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
+ */
+static unsigned long io_tlb_nslabs;
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static struct phys_addr {
+ struct page *page;
+ unsigned int offset;
+} *io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+ if (isdigit(*str)) {
+ io_tlb_nslabs = simple_strtoul(str, &str, 0) <<
+ (PAGE_SHIFT - IO_TLB_SHIFT);
+ /* avoid tail segment of size < IO_TLB_SEGSIZE */
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+ if (*str == ',')
+ ++str;
+ /*
+ * NB. 'force' enables the swiotlb, but doesn't force its use for
+ * every DMA like it does on native Linux.
+ */
+ if (!strcmp(str, "force"))
+ swiotlb_force = 1;
+ return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the PCI DMA API.
+ */
+void
+swiotlb_init_with_default_size (size_t default_size)
+{
+ unsigned long i;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> PAGE_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
+ (1 << IO_TLB_SHIFT));
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+
+ xen_create_contiguous_region(
+ (unsigned long)io_tlb_start,
+ get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)));
+
+ io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+ * between io_tlb_start and io_tlb_end.
+ */
+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+ io_tlb_orig_addr = alloc_bootmem(
+ io_tlb_nslabs * sizeof(*io_tlb_orig_addr));
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+ printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
+ virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end-1));
+}
+
+void
+swiotlb_init(void)
+{
+ /* The user can forcibly enable swiotlb. */
+ if (swiotlb_force)
+ swiotlb = 1;
+
+ /*
+ * Otherwise, enable for domain 0 if the machine has 'lots of memory',
+ * which we take to mean more than 2GB.
+ */
+ if (xen_start_info.flags & SIF_INITDOMAIN) {
+ dom0_op_t op;
+ op.cmd = DOM0_PHYSINFO;
+ if ((HYPERVISOR_dom0_op(&op) == 0) &&
+ (op.u.physinfo.total_pages > 0x7ffff))
+ swiotlb = 1;
+ }
+
+ if (swiotlb)
+ swiotlb_init_with_default_size(64 * (1<<20));
+}
+
+static void
+__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
+{
+ if (PageHighMem(buffer.page)) {
+ size_t len, bytes;
+ char *dev, *host, *kmp;
+ len = size;
+ while (len != 0) {
+ if (((bytes = len) + buffer.offset) > PAGE_SIZE)
+ bytes = PAGE_SIZE - buffer.offset;
+ kmp = kmap_atomic(buffer.page, KM_SWIOTLB);
+ dev = dma_addr + size - len;
+ host = kmp + buffer.offset;
+ memcpy((dir == DMA_FROM_DEVICE) ? host : dev,
+ (dir == DMA_FROM_DEVICE) ? dev : host,
+ bytes);
+ kunmap_atomic(kmp, KM_SWIOTLB);
+ len -= bytes;
+ buffer.page++;
+ buffer.offset = 0;
+ }
+ } else {
+ char *host = (char *)phys_to_virt(
+ page_to_pseudophys(buffer.page)) + buffer.offset;
+ if (dir == DMA_FROM_DEVICE)
+ memcpy(host, dma_addr, size);
+ else if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, host, size);
+ }
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
+{
+ unsigned long flags;
+ char *dma_addr;
+ unsigned int nslots, stride, index, wrap;
+ int i;
+
+ /*
+ * For mappings greater than a page, we limit the stride (and
+ * hence alignment) to a page size.
+ */
+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ if (size > PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+ stride = 1;
+
+ BUG_ON(!nslots);
+
+ /*
+ * Find suitable number of IO TLB entries size that will fit this
+ * request and allocate a buffer from that IO TLB pool.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ wrap = index = ALIGN(io_tlb_index, stride);
+
+ if (index >= io_tlb_nslabs)
+ wrap = index = 0;
+
+ do {
+ /*
+ * If we find a slot that indicates we have 'nslots'
+ * number of contiguous buffers, we allocate the
+ * buffers from that slot and mark the entries as '0'
+ * indicating unavailable.
+ */
+ if (io_tlb_list[index] >= nslots) {
+ int count = 0;
+
+ for (i = index; i < (int)(index + nslots); i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1;
+ (OFFSET(i, IO_TLB_SEGSIZE) !=
+ IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ dma_addr = io_tlb_start +
+ (index << IO_TLB_SHIFT);
+
+ /*
+ * Update the indices to avoid searching in
+ * the next round.
+ */
+ io_tlb_index =
+ ((index + nslots) < io_tlb_nslabs
+ ? (index + nslots) : 0);
+
+ goto found;
+ }
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ } while (index != wrap);
+
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return NULL;
+ }
+ found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ io_tlb_orig_addr[index] = buffer;
+ if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+ __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
+
+ return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ unsigned long flags;
+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ struct phys_addr buffer = io_tlb_orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+ __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
+
+ /*
+ * Return the buffer to the free list by setting the corresponding
+ * entries to indicate the number of contigous entries available.
+ * While returning the entries to the free list, we merge the entries
+ * with slots below and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+ io_tlb_list[index + nslots] : 0);
+ /*
+ * Step 1: return the slots to the free list, merging the
+ * slots with superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--)
+ io_tlb_list[i] = ++count;
+ /*
+ * Step 2: merge the returned slots with the preceding slots,
+ * if available (non zero)
+ */
+ for (i = index - 1;
+ (OFFSET(i, IO_TLB_SEGSIZE) !=
+ IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ struct phys_addr buffer = io_tlb_orig_addr[index];
+ BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
+ __sync_single(buffer, dma_addr, size, dir);
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+{
+ /*
+ * Ran out of IOMMU space for this operation. This is very bad.
+ * Unfortunately the drivers cannot handle this operation properly.
+ * unless they check for pci_dma_mapping_error (most don't)
+ * When the mapping is small enough return a static buffer to limit
+ * the damage, or panic when the transfer is too big.
+ */
+ printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
+ "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
+
+ if (size > io_tlb_overflow && do_panic) {
+ if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Memory would be corrupted\n");
+ if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Random memory would be DMAed\n");
+ }
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode. The
+ * PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+{
+ dma_addr_t dev_addr = virt_to_bus(ptr);
+ void *map;
+ struct phys_addr buffer;
+
+ BUG_ON(dir == DMA_NONE);
+
+ /*
+ * If the pointer passed in happens to be in the device's DMA window,
+ * we can safely return the device addr and not worry about bounce
+ * buffering it.
+ */
+ if (!range_straddles_page_boundary(ptr, size) &&
+ !address_needs_mapping(hwdev, dev_addr))
+ return dev_addr;
+
+ /*
+ * Oh well, have to allocate and map a bounce buffer.
+ */
+ buffer.page = virt_to_page(ptr);
+ buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
+ map = map_single(hwdev, buffer, size, dir);
+ if (!map) {
+ swiotlb_full(hwdev, size, dir, 1);
+ map = io_tlb_overflow_buffer;
+ }
+
+ dev_addr = virt_to_bus(map);
+
+ /*
+ * Ensure that the address returned is DMA'ble
+ */
+ if (address_needs_mapping(hwdev, dev_addr))
+ panic("map_single: bounce buffer is not DMA'ble");
+
+ return dev_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+ int dir)
+{
+ char *dma_addr = bus_to_virt(dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ unmap_single(hwdev, dma_addr, size, dir);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
+ * call this function before doing so. At the next point you give the PCI dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ char *dma_addr = bus_to_virt(dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ sync_single(hwdev, dma_addr, size, dir);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ char *dma_addr = bus_to_virt(dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ sync_single(hwdev, dma_addr, size, dir);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_single
+ * interface. Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * same here.
+ */
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ struct phys_addr buffer;
+ dma_addr_t dev_addr;
+ char *map;
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++) {
+ dev_addr = SG_ENT_PHYS_ADDRESS(sg);
+ if (address_needs_mapping(hwdev, dev_addr)) {
+ buffer.page = sg->page;
+ buffer.offset = sg->offset;
+ map = map_single(hwdev, buffer, sg->length, dir);
+ if (!map) {
+ /* Don't panic here, we expect map_sg users
+ to do proper error handling. */
+ swiotlb_full(hwdev, sg->length, dir, 0);
+ swiotlb_unmap_sg(hwdev, sg - i, i, dir);
+ sg[0].dma_length = 0;
+ return 0;
+ }
+ sg->dma_address = (dma_addr_t)virt_to_bus(map);
+ } else
+ sg->dma_address = dev_addr;
+ sg->dma_length = sg->length;
+ }
+ return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_single() above.
+ */
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ unmap_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+}
+
+dma_addr_t
+swiotlb_map_page(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ struct phys_addr buffer;
+ dma_addr_t dev_addr;
+ char *map;
+
+ dev_addr = page_to_phys(page) + offset;
+ if (address_needs_mapping(hwdev, dev_addr)) {
+ buffer.page = page;
+ buffer.offset = offset;
+ map = map_single(hwdev, buffer, size, direction);
+ if (!map) {
+ swiotlb_full(hwdev, size, direction, 1);
+ map = io_tlb_overflow_buffer;
+ }
+ dev_addr = (dma_addr_t)virt_to_bus(map);
+ }
+
+ return dev_addr;
+}
+
+void
+swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction)
+{
+ char *dma_addr = bus_to_virt(dma_address);
+
+ BUG_ON(direction == DMA_NONE);
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ unmap_single(hwdev, dma_addr, size, direction);
+}
+
+int
+swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported (struct device *hwdev, u64 mask)
+{
+ return (mask >= 0xffffffffUL);
+}
+
+EXPORT_SYMBOL(swiotlb_init);
+EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL(swiotlb_map_sg);
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+EXPORT_SYMBOL(swiotlb_map_page);
+EXPORT_SYMBOL(swiotlb_unmap_page);
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+EXPORT_SYMBOL(swiotlb_dma_supported);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h
--- /dev/null Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h Tue Aug
16 18:09:07 2005
@@ -0,0 +1,32 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0) KM_BOUNCE_READ,
+D(1) KM_SKB_SUNRPC_DATA,
+D(2) KM_SKB_DATA_SOFTIRQ,
+D(3) KM_USER0,
+D(4) KM_USER1,
+D(5) KM_BIO_SRC_IRQ,
+D(6) KM_BIO_DST_IRQ,
+D(7) KM_PTE0,
+D(8) KM_PTE1,
+D(9) KM_IRQ0,
+D(10) KM_IRQ1,
+D(11) KM_SOFTIRQ0,
+D(12) KM_SOFTIRQ1,
+D(13) KM_SWIOTLB,
+D(14) KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h
--- /dev/null Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h Tue Aug
16 18:09:07 2005
@@ -0,0 +1,22 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+ struct page *page;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+ unsigned int dma_length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg) ((sg)->dma_address)
+#define sg_dma_len(sg) ((sg)->dma_length)
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h
--- /dev/null Tue Aug 16 04:15:23 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h Tue Aug 16
18:09:07 2005
@@ -0,0 +1,42 @@
+#ifndef _ASM_SWIOTLB_H
+#define _ASM_SWIOTLB_H 1
+
+#include <linux/config.h>
+
+/* SWIOTLB interface */
+
+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t
size,
+ int dir);
+extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_single_for_device(struct device *hwdev,
+ dma_addr_t dev_addr,
+ size_t size, int dir);
+extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
+ struct scatterlist *sg, int nelems,
+ int dir);
+extern void swiotlb_sync_sg_for_device(struct device *hwdev,
+ struct scatterlist *sg, int nelems,
+ int dir);
+extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction);
+extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction);
+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+#else
+#define swiotlb 0
+#endif
+
+#endif
diff -r e3d811cca4e1 -r 1ae656509f02
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c Tue Aug 16
04:15:23 2005
+++ /dev/null Tue Aug 16 18:09:07 2005
@@ -1,336 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm-xen/balloon.h>
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- int i;
-
- BUG_ON(direction == DMA_NONE);
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
- s->dma_length = s->length;
- }
- return nents;
-}
-
-EXPORT_SYMBOL(dma_map_sg);
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, int dir)
-{
- int i;
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
- BUG_ON(s->page == NULL);
- BUG_ON(s->dma_address == 0);
- dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
- }
-}
-
-EXPORT_SYMBOL(dma_unmap_sg);
-
-struct dma_coherent_mem {
- void *virt_base;
- u32 device_base;
- int size;
- int flags;
- unsigned long *bitmap;
-};
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, unsigned gfp)
-{
- void *ret;
- unsigned int order = get_order(size);
- unsigned long vstart;
-
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- /* ignore region specifiers */
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(ret, 0, size);
- return ret;
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- return NULL;
- }
-
- if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
-
- vstart = __get_free_pages(gfp, order);
- ret = (void *)vstart;
- if (ret == NULL)
- return ret;
-
- xen_contig_memory(vstart, order);
-
- memset(ret, 0, size);
- *dma_handle = virt_to_bus(ret);
-
- return ret;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base +
(mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- } else
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-#if 0
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = (pages + 31)/32;
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
- memset(dev->dma_mem->bitmap, 0, bitmap_size);
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem->bitmap);
- out:
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if(!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >>
PAGE_SHIFT;
- int pos, err;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-#endif
-
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
- struct list_head list;
- dma_addr_t dma;
- char *bounce, *host;
- size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
-dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- void *bnc;
- dma_addr_t dma;
- unsigned long flags;
-
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- /*
- * Even if size is sub-page, the buffer may still straddle a page
- * boundary. Take into account buffer start offset. All other calls are
- * conservative and always search the dma_map list if it's non-empty.
- */
- if (((((unsigned long)ptr) & ~PAGE_MASK) + size) <= PAGE_SIZE) {
- dma = virt_to_bus(ptr);
- } else {
- BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC))
== NULL);
- BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL);
- if (direction != DMA_FROM_DEVICE)
- memcpy(bnc, ptr, size);
- ent->dma = dma;
- ent->bounce = bnc;
- ent->host = ptr;
- ent->size = size;
- spin_lock_irqsave(&dma_map_lock, flags);
- list_add(&ent->list, &dma_map_head);
- spin_unlock_irqrestore(&dma_map_lock, flags);
- }
-
- if ((dma+size) & ~*dev->dma_mask)
- out_of_line_bug();
- return dma;
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags;
-
- if (direction == DMA_NONE)
- out_of_line_bug();
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list ) {
- if (DMA_MAP_MATCHES(ent, dma_addr)) {
- list_del(&ent->list);
- break;
- }
- }
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- BUG_ON(dma_addr != ent->dma);
- BUG_ON(size != ent->size);
- if (direction != DMA_TO_DEVICE)
- memcpy(ent->host, ent->bounce, size);
- dma_free_coherent(dev, size, ent->bounce, ent->dma);
- kfree(ent);
- }
- }
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_TO_DEVICE)*/
- memcpy(ent->host+off, ent->bounce+off, size);
- }
- }
-}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
-
-void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t
size,
- enum dma_data_direction direction)
-{
- struct dma_map_entry *ent;
- unsigned long flags, off;
-
- /* Fast-path check: are there any multi-page DMA mappings? */
- if (!list_empty(&dma_map_head)) {
- spin_lock_irqsave(&dma_map_lock, flags);
- list_for_each_entry ( ent, &dma_map_head, list )
- if (DMA_MAP_MATCHES(ent, dma_handle))
- break;
- spin_unlock_irqrestore(&dma_map_lock, flags);
- if (&ent->list != &dma_map_head) {
- off = dma_handle - ent->dma;
- BUG_ON((off + size) > ent->size);
- /*if (direction != DMA_FROM_DEVICE)*/
- memcpy(ent->bounce+off, ent->host+off, size);
- }
- }
-
- flush_write_buffers();
-}
-EXPORT_SYMBOL(dma_sync_single_for_device);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|