Hi Isaku,
Thank you very much for taking a look at this. Some comments &
questions below.
Just to make sure I understand this DMA tracking... swiotlb only
needs to track pages that that are accessed by the I/O device directly.
The bounce buffer pool is presumably already pinned. This leaves only
the individual mappings that don't use that pool, correct? With a
hardware iommu the I/O device accesses unpinned memory whether it goes
through the iommu address space or not. So all mappings need to be
tracked. Correct?
On Tue, 2007-06-05 at 23:27 +0900, Isaku Yamahata wrote:
> # HG changeset patch
> # User yamahata@xxxxxxxxxxxxx
> # Date 1181035474 -32400
> # Node ID 83631cc2317d4f080fa611daea3aa479e047546f
> # Parent cb024947fa0186c5e08a71ecd4c0550967c5509a
> support dma tracking for sba_iommu.c
> PATCHNAME: dma_tracking_sba_iommu_c
>
> Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
>
> diff -r cb024947fa01 -r 83631cc2317d
> linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
> --- a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c Tue
> Jun 05 18:24:11 2007 +0900
> +++ b/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c Tue
> Jun 05 18:24:34 2007 +0900
> @@ -42,6 +42,9 @@
> #include <asm/system.h> /* wmb() */
>
> #include <asm/acpi-ext.h>
> +#ifdef CONFIG_XEN
> +#include <xen/gnttab.h>
> +#endif
>
> #define PFX "IOC: "
>
> @@ -901,7 +904,8 @@ sba_map_single(struct device *dev, void
> unsigned long flags;
> #endif
> #ifdef ALLOW_IOV_BYPASS
> - unsigned long pci_addr = virt_to_bus(addr);
> + unsigned long pci_addr = gnttab_dma_map_page(addr) +
> + offset_in_page(addr);
I might add an #ifdef CONFIG_XEN here, otherwise ok.
>
> ASSERT(to_pci_dev(dev)->dma_mask);
> /*
> @@ -994,6 +998,62 @@ sba_mark_clean(struct ioc *ioc, dma_addr
> }
> #endif
>
> +#ifdef CONFIG_XEN
> +static void
> +sba_gnttab_dma_unmap_page(struct ioc *ioc, dma_addr_t iova, size_t
> size)
> +{
> + u32 iovp = (u32) SBA_IOVP(ioc,iova);
> + int off = PDIR_INDEX(iovp);
> + void* vaddr;
> + struct page* page;
> + int i;
> + size_t step_size = max(PAGE_SIZE, iovp_size);
> +
> + if (!is_running_on_xen())
> + return;
> +
> + for (;;) {
> + size_t unmap_pages =
> + (min(step_size, size) + PAGE_SIZE - 1) /
> PAGE_SIZE;
> + vaddr = bus_to_virt(ioc->pdir_base[off] &
> + ~0xE000000000000FFFULL);
> + page = virt_to_page(vaddr);
> + for (i = 0; i < unmap_pages; i++) {
> + gnttab_dma_unmap_page(page_to_bus(page));
> + page++;
> + }
We must be able to optimize this. We're taking a machine physical
address out of the iotlb. We convert that to a guest virtual address
(+1 hypercall). We then convert that to a page, and convert that back
into a machine physical address (+1 hypercall). That's a terrible
amount of overhead, especially since gnttab_dma_unmap_page() currently
doesn't do anything. Why can't we use the machine physical address
directly from the iotlb and avoid both of these hypercalls?
> +
> + if (size <= step_size)
> + break;
> + off += step_size / iovp_size;
> + size -= step_size;
> + }
> +}
> +
> +static void
> +sba_gnttab_dma_map_sg(struct scatterlist *sglist, int nents)
> +{
> + int i;
> +
> + if (!is_running_on_xen())
> + return;
> +
> + for (i = 0; i < nents; i++) {
> + struct page* page = sglist[i].page;
> + size_t size = sglist[i].length;
> +
> + for (;;) {
> + gnttab_dma_map_page(page);
> +
> + if (size <= PAGE_SIZE)
> + break;
> + page++;
> + size -= PAGE_SIZE;
I think maybe the DMA API only allows scatterlist entries up to a
page size, so this may be redundant.
> + }
> + }
> +}
> +#endif
> +
> /**
> * sba_unmap_single - unmap one IOVA and free resources
> * @dev: instance of PCI owned by the driver that's asking.
> @@ -1027,6 +1087,9 @@ void sba_unmap_single(struct device *dev
> mark_clean(bus_to_virt(iova), size);
> }
> #endif
> +#ifdef CONFIG_XEN
> + gnttab_dma_unmap_page(iova);
This interface can map multiple pages, so shouldn't we iterate
through them?
> +#endif
> return;
> }
> #endif
> @@ -1043,7 +1106,10 @@ void sba_unmap_single(struct device *dev
> if (dir == DMA_FROM_DEVICE)
> sba_mark_clean(ioc, iova, size);
> #endif
> -
> +#ifdef CONFIG_XEN
> + sba_gnttab_dma_unmap_page(ioc, iova, size);
> +#endif
> +
> #if DELAYED_RESOURCE_CNT > 0
> spin_lock_irqsave(&ioc->saved_lock, flags);
> d = &(ioc->saved[ioc->saved_cnt]);
> @@ -1427,7 +1493,7 @@ int sba_map_sg(struct device *dev, struc
> if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0))
> {
> for (sg = sglist ; filled < nents ; filled++, sg++){
> sg->dma_length = sg->length;
> - sg->dma_address =
> virt_to_bus(sba_sg_address(sg));
> + sg->dma_address =
> gnttab_dma_map_page(sg->address) + sg->offset;
I might use a #ifdef CONFIG_XEN here, but no big deal.
> }
> return filled;
> }
> @@ -1460,6 +1526,10 @@ int sba_map_sg(struct device *dev, struc
> ** Access to the virtual address is what forces a two pass
> algorithm.
> */
> coalesced = sba_coalesce_chunks(ioc, sglist, nents);
> +
> +#ifdef CONFIG_XEN
> + sba_gnttab_dma_map_sg(sglist, nents);
> +#endif
This should probably move up before sba_coalesce_chunks() so that
it's not confused with gnttab mapping the coalesced sglist.
>
> /*
> ** Program the I/O Pdir
I think the above should probably work, but I'm concerned about the
overhead. We're adding one hypercall per scatterlist entry on the sg
mapping path (we already have 2 virt to machine lookups on the map
single path... not optimal). If we had the gnttab interface that didn't
do it's own lookup, we might be able to put this in sba_io_pdir_entry()
to avoid the extra hypercall. Thanks again for looking at this.
Alex
--
Alex Williamson HP Open Source & Linux Org.
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|