[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 07/27] Xen/x86/PCI: Add support for the Xen PCI subsytem



From: Alex Nixon <alex.nixon@xxxxxxxxxx>

On boot, the system will search to see if a Xen iommu/pci subsystem is
available.  If the kernel detects it's running in a domain rather than
on bare hardware, this subsystem will be used.  Otherwise, it falls
back to using hardware as usual.

The frontend stub lives in arch/x86/pci-xen.c, alongside other
sub-arch PCI init code (e.g. olpc.c)

(All subsequent fixes, API changes and swiotlb operations folded in.)

Signed-off-by: Alex Nixon <alex.nixon@xxxxxxxxxx>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
 arch/x86/Kconfig                 |    4 +
 arch/x86/include/asm/pci_x86.h   |    1 +
 arch/x86/include/asm/xen/iommu.h |   12 ++
 arch/x86/kernel/pci-dma.c        |    3 +
 arch/x86/pci/Makefile            |    1 +
 arch/x86/pci/init.c              |    6 +
 arch/x86/pci/xen.c               |   52 +++++++
 drivers/pci/Makefile             |    2 +
 drivers/pci/xen-iommu.c          |  294 ++++++++++++++++++++++++++++++++++++++
 9 files changed, 375 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/xen/iommu.h
 create mode 100644 arch/x86/pci/xen.c
 create mode 100644 drivers/pci/xen-iommu.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 15ec8a2..9092750 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1828,6 +1828,10 @@ config PCI_OLPC
        def_bool y
        depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
 
+config PCI_XEN
+       def_bool y
+       depends on XEN_PCI_PASSTHROUGH || XEN_DOM0_PCI
+
 config PCI_DOMAINS
        def_bool y
        depends on PCI
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 5401ca2..34f03a4 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -107,6 +107,7 @@ extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
 extern void pci_pcbios_init(void);
 extern int pci_olpc_init(void);
+extern int pci_xen_init(void);
 extern void __init dmi_check_pciprobe(void);
 extern void __init dmi_check_skip_isa_align(void);
 
diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h
new file mode 100644
index 0000000..75df312
--- /dev/null
+++ b/arch/x86/include/asm/xen/iommu.h
@@ -0,0 +1,12 @@
+#ifndef ASM_X86__XEN_IOMMU_H
+
+#ifdef CONFIG_PCI_XEN
+extern void xen_iommu_init(void);
+#else
+static inline void xen_iommu_init(void)
+{
+}
+#endif
+
+#endif
+
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f293a8d..361fde2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -9,6 +9,7 @@
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
+#include <asm/xen/iommu.h>
 
 static int forbid_dac __read_mostly;
 
@@ -265,6 +266,8 @@ EXPORT_SYMBOL(dma_supported);
 
 static int __init pci_iommu_init(void)
 {
+       xen_iommu_init();
+
        calgary_iommu_init();
 
        intel_iommu_init();
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index d49202e..64182c5 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS)          += pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig_$(BITS).o direct.o mmconfig-shared.o
 obj-$(CONFIG_PCI_DIRECT)       += direct.o
 obj-$(CONFIG_PCI_OLPC)         += olpc.o
+obj-$(CONFIG_PCI_XEN)          += xen.o
 
 obj-y                          += fixup.o
 obj-$(CONFIG_ACPI)             += acpi.o
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 25a1f8e..4e2f90a 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -15,10 +15,16 @@ static __init int pci_arch_init(void)
        if (!(pci_probe & PCI_PROBE_NOEARLY))
                pci_mmcfg_early_init();
 
+#ifdef CONFIG_PCI_XEN
+       if (!pci_xen_init())
+               return 0;
+#endif
+
 #ifdef CONFIG_PCI_OLPC
        if (!pci_olpc_init())
                return 0;       /* skip additional checks if it's an XO */
 #endif
+
 #ifdef CONFIG_PCI_BIOS
        pci_pcbios_init();
 #endif
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
new file mode 100644
index 0000000..76f803f
--- /dev/null
+++ b/arch/x86/pci/xen.c
@@ -0,0 +1,52 @@
+/*
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ *                        x86 PCI core to support the Xen PCI Frontend
+ *
+ *   Author: Ryan Wilson <hap9@xxxxxxxxxxxxxx>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+#include <asm/pci_x86.h>
+
+#include <asm/xen/hypervisor.h>
+
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+       return 0;
+}
+
+extern int isapnp_disable;
+
+int __init pci_xen_init(void)
+{
+       if (!xen_pv_domain() || xen_initial_domain())
+               return -ENODEV;
+
+       printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
+
+       pcibios_set_cache_line_size();
+
+       pcibios_enable_irq = xen_pcifront_enable_irq;
+       pcibios_disable_irq = NULL;
+
+#ifdef CONFIG_ACPI
+       /* Keep ACPI out of the picture */
+       acpi_noirq = 1;
+#endif
+
+#ifdef CONFIG_ISAPNP
+       /* Stop isapnp from probing */
+       isapnp_disable = 1;
+#endif
+
+       /* Ensure a device still gets scanned even if it's fn number
+        * is non-zero.
+        */
+       pci_scan_all_fns = 1;
+
+       return 0;
+}
+
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 3d07ce2..106404e 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -27,6 +27,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
 # Build Intel IOMMU support
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
+# Build Xen IOMMU support
+obj-$(CONFIG_PCI_XEN) += xen-iommu.o
 obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
 
 #
diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c
new file mode 100644
index 0000000..5b701e8
--- /dev/null
+++ b/drivers/pci/xen-iommu.c
@@ -0,0 +1,294 @@
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/bug.h>
+
+#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+
+#include <asm/iommu.h>
+#include <asm/swiotlb.h>
+#include <asm/tlbflush.h>
+
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
+
+/* Print address range with message */
+#define PAR(msg, addr, size)                                   \
+do {                                                   \
+       printk(msg "[%#llx - %#llx]\n",                 \
+       (unsigned long long)addr,                       \
+       (unsigned long long)addr + size);               \
+} while (0)
+
+struct dma_coherent_mem {
+       void            *virt_base;
+       u32             device_base;
+       int             size;
+       int             flags;
+       unsigned long   *bitmap;
+};
+
+static inline int address_needs_mapping(struct device *hwdev,
+                                               dma_addr_t addr)
+{
+       dma_addr_t mask = 0xffffffff;
+       int ret;
+
+       /* If the device has a mask, use it, otherwise default to 32 bits */
+       if (hwdev && hwdev->dma_mask)
+               mask = *hwdev->dma_mask;
+
+       ret = (addr & ~mask) != 0;
+
+       if (ret) {
+               printk(KERN_ERR "dma address needs mapping\n");
+               printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr);
+       }
+       return ret;
+}
+
+static int check_pages_physically_contiguous(unsigned long pfn,
+                                            unsigned int offset,
+                                            size_t length)
+{
+       unsigned long next_mfn;
+       int i;
+       int nr_pages;
+
+       next_mfn = pfn_to_mfn(pfn);
+       nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+       for (i = 1; i < nr_pages; i++) {
+               if (pfn_to_mfn(++pfn) != ++next_mfn)
+                       return 0;
+       }
+       return 1;
+}
+
+static int range_straddles_page_boundary(phys_addr_t p, size_t size)
+{
+       unsigned long pfn = PFN_DOWN(p);
+       unsigned int offset = p & ~PAGE_MASK;
+
+       if (offset + size <= PAGE_SIZE)
+               return 0;
+       if (check_pages_physically_contiguous(pfn, offset, size))
+               return 0;
+       return 1;
+}
+
+static inline void xen_dma_unmap_page(struct page *page)
+{
+       /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here
+        * to deal with foreign pages.  We'll need similar logic here at
+        * some point.
+        */
+}
+
+/* Gets dma address of a page */
+static inline dma_addr_t xen_dma_map_page(struct page *page)
+{
+       /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal
+        * with foreign pages.  We'll need similar logic here at some
+        * point.
+        */
+       return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT;
+}
+
+static int xen_map_sg(struct device *hwdev, struct scatterlist *sg,
+                     int nents,
+                     enum dma_data_direction direction,
+                     struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       struct page *page;
+       int i, rc;
+
+       BUG_ON(direction == DMA_NONE);
+       WARN_ON(nents == 0 || sg[0].length == 0);
+
+       for_each_sg(sg, s, nents, i) {
+               BUG_ON(!sg_page(s));
+               page = sg_page(s);
+               s->dma_address = xen_dma_map_page(page) + s->offset;
+               s->dma_length = s->length;
+               IOMMU_BUG_ON(range_straddles_page_boundary(
+                               page_to_phys(page), s->length));
+       }
+
+       rc = nents;
+
+       flush_write_buffers();
+       return rc;
+}
+
+static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+                        int nents,
+                        enum dma_data_direction direction,
+                        struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       struct page *page;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address)));
+               xen_dma_unmap_page(page);
+       }
+}
+
+static void *xen_alloc_coherent(struct device *dev, size_t size,
+                               dma_addr_t *dma_handle, gfp_t gfp)
+{
+       void *ret;
+       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+       unsigned int order = get_order(size);
+       unsigned long vstart;
+       u64 mask;
+
+       /* ignore region specifiers */
+       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+       if (mem) {
+               int page = bitmap_find_free_region(mem->bitmap, mem->size,
+                                                    order);
+               if (page >= 0) {
+                       *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+                       ret = mem->virt_base + (page << PAGE_SHIFT);
+                       memset(ret, 0, size);
+                       return ret;
+               }
+               if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+                       return NULL;
+       }
+
+       if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+               gfp |= GFP_DMA;
+
+       vstart = __get_free_pages(gfp, order);
+       ret = (void *)vstart;
+
+       if (dev != NULL && dev->coherent_dma_mask)
+               mask = dev->coherent_dma_mask;
+       else
+               mask = 0xffffffff;
+
+       if (ret != NULL) {
+               if (xen_create_contiguous_region(vstart, order,
+                                                fls64(mask)) != 0) {
+                       free_pages(vstart, order);
+                       return NULL;
+               }
+               memset(ret, 0, size);
+               *dma_handle = virt_to_machine(ret).maddr;
+       }
+       return ret;
+}
+
+static void xen_free_coherent(struct device *dev, size_t size,
+                        void *vaddr, dma_addr_t dma_addr)
+{
+       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+       int order = get_order(size);
+
+       if (mem && vaddr >= mem->virt_base &&
+           vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+               int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+               bitmap_release_region(mem->bitmap, page, order);
+       } else {
+               xen_destroy_contiguous_region((unsigned long)vaddr, order);
+               free_pages((unsigned long)vaddr, order);
+       }
+}
+
+static dma_addr_t xen_map_page(struct device *dev, struct page *page,
+                              unsigned long offset, size_t size,
+                              enum dma_data_direction direction,
+                              struct dma_attrs *attrs)
+{
+       dma_addr_t dma;
+
+       BUG_ON(direction == DMA_NONE);
+
+       WARN_ON(size == 0);
+
+       dma = xen_dma_map_page(page) + offset;
+
+       IOMMU_BUG_ON(address_needs_mapping(dev, dma));
+       flush_write_buffers();
+       return dma;
+}
+
+static void xen_unmap_page(struct device *dev, dma_addr_t dma_addr,
+                          size_t size,
+                          enum dma_data_direction direction,
+                          struct dma_attrs *attrs)
+{
+       BUG_ON(direction == DMA_NONE);
+       xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr))));
+}
+
+static struct dma_map_ops xen_dma_ops = {
+       .dma_supported = NULL,
+
+       .alloc_coherent = xen_alloc_coherent,
+       .free_coherent = xen_free_coherent,
+
+       .map_page = xen_map_page,
+       .unmap_page = xen_unmap_page,
+
+       .map_sg = xen_map_sg,
+       .unmap_sg = xen_unmap_sg,
+
+       .mapping_error = NULL,
+
+       .is_phys = 0,
+};
+
+static struct dma_map_ops xen_swiotlb_dma_ops = {
+       .dma_supported = swiotlb_dma_supported,
+
+       .alloc_coherent = xen_alloc_coherent,
+       .free_coherent = xen_free_coherent,
+
+       .map_page = swiotlb_map_page,
+       .unmap_page = swiotlb_unmap_page,
+
+       .map_sg = swiotlb_map_sg_attrs,
+       .unmap_sg = swiotlb_unmap_sg_attrs,
+
+       .mapping_error = swiotlb_dma_mapping_error,
+
+       .is_phys = 0,
+};
+
+void __init xen_iommu_init(void)
+{
+       if (!xen_pv_domain())
+               return;
+
+       printk(KERN_INFO "Xen: Initializing Xen DMA ops\n");
+
+       force_iommu = 0;
+       dma_ops = &xen_dma_ops;
+
+       if (swiotlb) {
+               printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n");
+               dma_ops = &xen_swiotlb_dma_ops;
+       }
+}
+
-- 
1.6.0.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.