WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3/7] PCI device register/unregister + pci_dev cleanup

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 3/7] PCI device register/unregister + pci_dev cleanups
From: Espen Skoglund <espen.skoglund@xxxxxxxxxxxxx>
Date: Fri, 4 Jul 2008 17:36:46 +0100
Delivery-date: Fri, 04 Jul 2008 09:38:03 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <18542.20587.995970.962113@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <18542.20587.995970.962113@xxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Add management and locking of PCI device structures

Add functions for managing pci_dev structures.  Create a list
containing all current pci_devs.  Remove msi_pdev_list.  Create a
read-write lock protecting all pci_dev lists.  Add spinlocks for
pci_dev access.  Do necessary modifications to MSI code.

Signed-off-by: Espen Skoglund <espen.skoglund@xxxxxxxxxxxxx>


--
 b/xen/drivers/passthrough/pci.c             |  124 ++++++++++++++++++++
 xen/arch/x86/i8259.c                        |    3 
 xen/arch/x86/msi.c                          |  172 +++++++++++-----------------
 xen/arch/x86/physdev.c                      |    4 
 xen/drivers/passthrough/Makefile            |    1 
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   72 ++++++-----
 xen/drivers/passthrough/iommu.c             |    5 
 xen/drivers/passthrough/vtd/iommu.c         |   73 ++++++-----
 xen/include/asm-x86/msi.h                   |    4 
 xen/include/xen/iommu.h                     |    9 -
 xen/include/xen/pci.h                       |   23 +++
 11 files changed, 304 insertions(+), 186 deletions(-)
--
diff -r 5b0699fb81a5 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/i8259.c      Fri Jul 04 17:19:39 2008 +0100
@@ -382,7 +382,6 @@
 
 static struct irqaction cascade = { no_action, "cascade", NULL};
 
-extern struct list_head msi_pdev_list;
 void __init init_IRQ(void)
 {
     int i;
@@ -419,7 +418,5 @@
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
 
     setup_irq(2, &cascade);
-
-    INIT_LIST_HEAD(&msi_pdev_list);
 }
 
diff -r 5b0699fb81a5 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/msi.c        Fri Jul 04 17:19:39 2008 +0100
@@ -28,21 +28,6 @@
 #include <xen/iommu.h>
 
 extern int msi_irq_enable;
-
-/* PCI-dev list with MSI/MSIX capabilities */
-DEFINE_SPINLOCK(msi_pdev_lock);
-struct list_head msi_pdev_list;
-
-struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
-{
-    struct pci_dev *pdev = NULL;
-
-    list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
-        if ( pdev->bus == bus && pdev->devfn == devfn )
-            return pdev;
-
-    return NULL;
-}
 
 /* bitmap indicate which fixed map is free */
 DEFINE_SPINLOCK(msix_fixmap_lock);
@@ -112,10 +97,8 @@
     }
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
     switch ( entry->msi_attrib.type )
     {
     case PCI_CAP_ID_MSI:
@@ -147,7 +130,7 @@
     {
         void __iomem *base;
         base = entry->mask_base +
-            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+           entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
 
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -164,9 +147,6 @@
 
 static int set_vector_msi(struct msi_desc *entry)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( entry->vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
@@ -174,19 +154,12 @@
         return -EINVAL;
     }
 
-    desc = &irq_desc[entry->vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = entry;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[entry->vector].msi_desc = entry;
     return 0;
 }
 
 static int unset_vector_msi(int vector)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -194,18 +167,12 @@
         return -EINVAL;
     }
 
-    desc = &irq_desc[vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = NULL;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[vector].msi_desc = NULL;
     return 0;
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
     if ( vtd_enabled )
         msi_msg_write_remap_rte(entry, msg);
 
@@ -254,6 +221,7 @@
 
 void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+    struct msi_desc *desc = irq_desc[irq].msi_desc;
     struct msi_msg msg;
     unsigned int dest;
 
@@ -263,12 +231,18 @@
         mask = TARGET_CPUS;
     dest = cpu_mask_to_apicid(mask);
 
-    read_msi_msg(irq, &msg);
+    if ( !desc )
+       return;
+
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
+    spin_lock(&desc->dev->lock);
+    read_msi_msg(desc, &msg);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-    write_msi_msg(irq, &msg);
+    write_msi_msg(desc, &msg);
+    spin_unlock(&desc->dev->lock);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -290,7 +264,7 @@
     }
 }
 
-void msix_set_enable(struct pci_dev *dev, int enable)
+static void msix_set_enable(struct pci_dev *dev, int enable)
 {
     int pos;
     u16 control;
@@ -335,6 +309,7 @@
 {
     struct msi_desc *entry = irq_desc[irq].msi_desc;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     BUG_ON(!entry || !entry->dev);
     switch (entry->msi_attrib.type) {
     case PCI_CAP_ID_MSI:
@@ -401,7 +376,7 @@
 
     msi_compose_msg(dev, desc->vector, &msg);
     set_vector_msi(desc);
-    write_msi_msg(desc->vector, &msg);
+    write_msi_msg(irq_desc[desc->vector].msi_desc, &msg);
 
     return 0;
 }
@@ -415,8 +390,8 @@
 {
     struct msi_desc *entry;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     entry = irq_desc[vector].msi_desc;
-
     teardown_msi_vector(vector);
 
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -619,35 +594,22 @@
 static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
 {
     int status;
-    struct pci_dev *dev;
+    struct pci_dev *pdev;
 
-    dev = get_msi_pdev(bus, devfn);
-    if ( !dev )
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
     {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
-    }
-
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
-    {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
             %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msi_capability_init(dev, vector);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msi_capability_init(pdev, vector);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -660,6 +622,13 @@
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -674,6 +643,7 @@
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 /**
@@ -694,47 +664,35 @@
 static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
 {
     int status, pos, nr_entries;
-    struct pci_dev *dev;
+    struct pci_dev *pdev;
     u16 control;
     u8 slot = PCI_SLOT(devfn);
     u8 func = PCI_FUNC(devfn);
+
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
 
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
     if (entry_nr > nr_entries)
+    {
+       spin_unlock(&pdev->lock);
         return -EINVAL;
-
-    /* Check whether driver already requested for MSI-X irqs */
-    dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-    {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
     }
 
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
     {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
                 device %02x:%02x.%01x.\n", vector, bus,
                 PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msix_capability_init(dev, vector, entry_nr);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msix_capability_init(pdev, vector, entry_nr);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -747,6 +705,13 @@
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -761,10 +726,12 @@
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
 {
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     if ( msi )
         return __pci_enable_msi(bus, devfn, vector);
     else
@@ -773,9 +740,11 @@
 
 void pci_disable_msi(int vector)
 {
-    irq_desc_t *desc;
+    irq_desc_t *desc = &irq_desc[vector];
+    ASSERT(spin_is_locked(&desc->lock));
+    if ( !desc->msi_desc )
+       return;
 
-    desc = &irq_desc[vector];
     if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
         __pci_disable_msi(vector);
     else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -789,9 +758,17 @@
     irq_desc_t *desc;
     unsigned long flags;
 
+retry:
     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
     {
         desc = &irq_desc[entry->vector];
+
+       local_irq_save(flags);
+       if ( !spin_trylock(&desc->lock) )
+       {
+           local_irq_restore(flags);
+           goto retry;
+       }
 
         spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
@@ -800,22 +777,17 @@
             BUG_ON(desc->status & IRQ_GUEST);
             desc->handler = &no_irq_type;
         }
-        spin_unlock_irqrestore(&desc->lock, flags);
 
         msi_free_vector(entry->vector);
+        spin_unlock_irqrestore(&desc->lock, flags);
     }
 }
 
-void pci_cleanup_msi(u8 bus, u8 devfn)
+void pci_cleanup_msi(struct pci_dev *pdev)
 {
-    struct pci_dev *dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-        return;
-
     /* Disable MSI and/or MSI-X */
-    msi_set_enable(dev, 0);
-    msix_set_enable(dev, 0);
-    msi_free_vectors(dev);
+    msi_set_enable(pdev, 0);
+    msix_set_enable(pdev, 0);
+    msi_free_vectors(pdev);
 }
 
diff -r 5b0699fb81a5 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/physdev.c    Fri Jul 04 17:19:39 2008 +0100
@@ -114,12 +114,12 @@
             gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
                      vector);
         desc->handler = &pci_msi_type;
-        spin_unlock_irqrestore(&desc->lock, flags);
 
         ret = pci_enable_msi(map->msi_info.bus,
                                     map->msi_info.devfn, vector,
                                                         map->msi_info.entry_nr,
                                                         map->msi_info.msi);
+        spin_unlock_irqrestore(&desc->lock, flags);
         if ( ret )
             goto done;
     }
@@ -161,10 +161,10 @@
         irq_desc_t *desc;
 
         desc = &irq_desc[vector];
+        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->msi_desc )
             pci_disable_msi(vector);
 
-        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
         {
             /* MSI is not shared, so should be released already */
diff -r 5b0699fb81a5 xen/drivers/passthrough/Makefile
--- a/xen/drivers/passthrough/Makefile  Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/Makefile  Fri Jul 04 17:19:39 2008 +0100
@@ -3,3 +3,4 @@
 
 obj-y += iommu.o
 obj-y += io.o
+obj-y += pci.o
diff -r 5b0699fb81a5 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:04:47 
2008 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:19:39 
2008 +0100
@@ -298,6 +298,7 @@
     u32 l;
     int bdf;
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -310,10 +311,9 @@
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
 
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 bdf = (bus << 8) | pdev->devfn;
                 /* supported device? */
@@ -325,6 +325,7 @@
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 int amd_iov_detect(void)
@@ -493,38 +494,37 @@
     struct amd_iommu *iommu;
     int bdf;
 
-    for_each_pdev ( source, pdev )
+    pdev = pci_lock_domain_pdev(source, bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    bdf = (bus << 8) | devfn;
+    /* supported device? */
+    iommu = (bdf < ivrs_bdf_entries) ?
+       find_iommu_for_device(bus, pdev->devfn) : NULL;
+
+    if ( !iommu )
     {
-        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
-            continue;
+       spin_unlock(&pdev->lock);
+       amd_iov_error("Fail to find iommu."
+                     " %x:%x.%x cannot be assigned to domain %d\n", 
+                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
+       return -ENODEV;
+    }
 
-        pdev->bus = bus;
-        pdev->devfn = devfn;
+    amd_iommu_disable_domain_device(source, iommu, bdf);
 
-        bdf = (bus << 8) | devfn;
-        /* supported device? */
-        iommu = (bdf < ivrs_bdf_entries) ?
-            find_iommu_for_device(bus, pdev->devfn) : NULL;
+    write_lock(&pcidevs_lock);
+    list_move(&pdev->domain_list, &target->arch.pdev_list);
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
 
-        if ( !iommu )
-        {
-            amd_iov_error("Fail to find iommu."
-                     " %x:%x.%x cannot be assigned to domain %d\n", 
-                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
-            return -ENODEV;
-        }
-
-        amd_iommu_disable_domain_device(source, iommu, bdf);
-        /* Move pci device from the source domain to target domain. */
-        list_move(&pdev->domain_list, &target->arch.pdev_list);
-
-        amd_iommu_setup_domain_device(target, iommu, bdf);
-        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
+    amd_iommu_setup_domain_device(target, iommu, bdf);
+    amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
-        break;
-    }
+    spin_unlock(&pdev->lock);
     return 0;
 }
 
@@ -552,14 +552,16 @@
 static void release_domain_devices(struct domain *d)
 {
     struct pci_dev *pdev;
+    u8 bus, devfn;
 
-    while ( has_arch_pdevs(d) )
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
     {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
         pdev_flr(pdev->bus, pdev->devfn);
+       bus = pdev->bus; devfn = pdev->devfn;
+       spin_unlock(&pdev->lock);
         amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-        reassign_device(d, dom0, pdev->bus, pdev->devfn);
+                    bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        reassign_device(d, dom0, bus, devfn);
     }
 }
 
@@ -619,11 +621,11 @@
     release_domain_devices(d);
 }
 
-static void amd_iommu_return_device(
+static int amd_iommu_return_device(
     struct domain *s, struct domain *t, u8 bus, u8 devfn)
 {
     pdev_flr(bus, devfn);
-    reassign_device(s, t, bus, devfn);
+    return reassign_device(s, t, bus, devfn);
 }
 
 static int amd_iommu_group_id(u8 bus, u8 devfn)
diff -r 5b0699fb81a5 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:19:39 2008 +0100
@@ -240,6 +240,7 @@
 
     group_id = ops->get_device_group_id(bus, devfn);
 
+    read_lock(&pcidevs_lock);
     for_each_pdev( d, pdev )
     {
         if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
@@ -252,10 +253,14 @@
             bdf |= (pdev->bus & 0xff) << 16;
             bdf |= (pdev->devfn & 0xff) << 8;
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+            {
+                read_unlock(&pcidevs_lock);
                 return -1;
+            }
             i++;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     return i;
 }
diff -r 5b0699fb81a5 xen/drivers/passthrough/pci.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/pci.c     Fri Jul 04 17:19:39 2008 +0100
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008,  Netronome Systems, Inc.
+ *                
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/list.h>
+#include <xen/prefetch.h>
+#include <xen/keyhandler.h>
+
+
+LIST_HEAD(alldevs_list);
+rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev;
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( pdev->bus == bus && pdev->devfn == devfn )
+           return pdev;
+
+    pdev = xmalloc(struct pci_dev);
+    if ( !pdev )
+       return NULL;
+
+    *((u8*) &pdev->bus) = bus;
+    *((u8*) &pdev->devfn) = devfn;
+    pdev->domain = NULL;
+    spin_lock_init(&pdev->lock);
+    INIT_LIST_HEAD(&pdev->msi_list);
+    list_add(&pdev->alldevs_list, &alldevs_list);
+
+    return pdev;
+}
+
+void free_pdev(struct pci_dev *pdev)
+{
+    list_del(&pdev->alldevs_list);
+    xfree(pdev);
+}
+
+struct pci_dev *pci_lock_pdev(int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) )
+       {
+           spin_lock(&pdev->lock);
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
+    {
+       spin_lock(&pdev->lock);
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) &&
+            (pdev->domain == d) )
+       {
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+       spin_unlock(&pdev->lock);
+    }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+static void dump_pci_devices(unsigned char ch)
+{
+    struct pci_dev *pdev;
+    struct msi_desc *msi;
+
+    printk("==== PCI devices ====\n");
+    read_lock(&pcidevs_lock);
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+    {
+       spin_lock(&pdev->lock);
+        printk("%02x:%02x.%x - dom %-3d - MSIs < ",
+               pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+               pdev->domain ? pdev->domain->domain_id : -1);
+       list_for_each_entry ( msi, &pdev->msi_list, list )
+           printk("%d ", msi->vector);
+       printk(">\n");
+       spin_unlock(&pdev->lock);
+    }
+
+    read_unlock(&pcidevs_lock);
+}
+
+static int __init setup_dump_pcidevs(void)
+{
+    register_keyhandler('P', dump_pci_devices, "dump PCI devices");
+    return 0;
+}
+__initcall(setup_dump_pcidevs);
diff -r 5b0699fb81a5 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:19:39 2008 +0100
@@ -1345,7 +1345,7 @@
     return ret;
 }
 
-void reassign_device_ownership(
+static int reassign_device_ownership(
     struct domain *source,
     struct domain *target,
     u8 bus, u8 devfn)
@@ -1353,61 +1353,62 @@
     struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct pci_dev *pdev;
     struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    int status;
-    int found = 0;
+    struct iommu *pdev_iommu;
+    int ret, found = 0;
+
+    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+        return -ENODEV;
 
     pdev_flr(bus, devfn);
-
-    for_each_pdev( source, pdev )
-        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
-            goto found;
-
-    return;
-
-found:
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
-    iommu = drhd->iommu;
+    pdev_iommu = drhd->iommu;
     domain_context_unmap(bus, devfn);
 
-    /* Move pci device from the source domain to target domain. */
+    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
 
+    ret = domain_context_mapping(target, bus, devfn);
+    spin_unlock(&pdev->lock);
+
+    read_lock(&pcidevs_lock);
     for_each_pdev ( source, pdev )
     {
         drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
-        if ( drhd->iommu == iommu )
+        if ( drhd->iommu == pdev_iommu )
         {
             found = 1;
             break;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     if ( !found )
-        clear_bit(iommu->index, &source_hd->iommu_bitmap);
+        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
 
-    status = domain_context_mapping(target, bus, devfn);
-    if ( status != 0 )
-        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
+    return ret;
 }
 
 void return_devices_to_dom0(struct domain *d)
 {
     struct pci_dev *pdev;
 
-    while ( has_arch_pdevs(d) )
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
     {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
-        pci_cleanup_msi(pdev->bus, pdev->devfn);
+        pci_cleanup_msi(pdev);
+        spin_unlock(&pdev->lock);
         reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
     }
 
 #ifdef VTD_DEBUG
+    read_lock(&pcidevs_lock);
     for_each_pdev ( dom0, pdev )
         dprintk(XENLOG_INFO VTDPREFIX,
                 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
                 dom0->domain_id, pdev->bus,
                 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+    read_unlock(&pcidevs_lock);
 #endif
 }
 
@@ -1568,11 +1569,7 @@
         return ret;
 
     if ( domain_context_mapped(bus, devfn) == 0 )
-    {
         ret = domain_context_mapping(d, bus, devfn);
-        if ( !ret )
-            return 0;
-    }
 
     return ret;
 }
@@ -1586,6 +1583,7 @@
 
     hd = domain_hvm_iommu(d);
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -1597,10 +1595,10 @@
                 if ( (l == 0xffffffff) || (l == 0x00000000) ||
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 ret = domain_context_mapping(d, pdev->bus, pdev->devfn);
                 if ( ret != 0 )
@@ -1609,6 +1607,7 @@
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 void clear_fault_bits(struct iommu *iommu)
@@ -1737,9 +1736,11 @@
 {
     struct pci_dev *pdev;
 
-    for_each_pdev( dom0, pdev )
-        if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
-            return 0;
+    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
+    {
+        spin_unlock(&pdev->lock);
+        return 0;
+    }
 
     return 1;
 }
@@ -1751,9 +1752,11 @@
     u16 bdf;
 
     if ( list_empty(&acpi_drhd_units) )
+        return -ENODEV;
+
+    ret = reassign_device_ownership(dom0, d, bus, devfn);
+    if ( ret )
         return ret;
-
-    reassign_device_ownership(dom0, d, bus, devfn);
 
     /* Setup rmrr identity mapping */
     for_each_rmrr_device( rmrr, bdf, i )
diff -r 5b0699fb81a5 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/asm-x86/msi.h Fri Jul 04 17:19:39 2008 +0100
@@ -63,12 +63,10 @@
 /* Helper functions */
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
-extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
-extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
 extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
 extern void pci_disable_msi(int vector);
-extern void pci_cleanup_msi(u8 bus, u8 devfn);
+extern void pci_cleanup_msi(struct pci_dev *pdev);
 
 struct msi_desc {
        struct {
diff -r 5b0699fb81a5 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/xen/iommu.h   Fri Jul 04 17:19:39 2008 +0100
@@ -56,6 +56,8 @@
     struct intel_iommu *intel;
 };
 
+int iommu_add_device(u8 bus, u8 devfn);
+void iommu_remove_device(u8 bus, u8 devfn);
 int iommu_domain_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
@@ -63,9 +65,6 @@
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
-void reassign_device_ownership(struct domain *source,
-                               struct domain *target,
-                               u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 void iommu_domain_teardown(struct domain *d);
@@ -99,8 +98,8 @@
     void (*teardown)(struct domain *d);
     int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn);
     int (*unmap_page)(struct domain *d, unsigned long gfn);
-    void (*reassign_device)(struct domain *s, struct domain *t,
-                            u8 bus, u8 devfn);
+    int (*reassign_device)(struct domain *s, struct domain *t,
+                          u8 bus, u8 devfn);
     int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
diff -r 5b0699fb81a5 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/xen/pci.h     Fri Jul 04 17:19:39 2008 +0100
@@ -10,6 +10,7 @@
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/list.h>
+#include <xen/spinlock.h>
 
 /*
  * The PCI interface treats multi-function devices as independent
@@ -29,15 +30,31 @@
 #define PCI_BDF2(b,df)  (((b & 0xff) << 8) | (df & 0xff))
 
 struct pci_dev {
+    struct list_head alldevs_list;
     struct list_head domain_list;
-    struct list_head msi_dev_list;
-    u8 bus;
-    u8 devfn;
     struct list_head msi_list;
+    struct domain *domain;
+    const u8 bus;
+    const u8 devfn;
+    spinlock_t lock;
 };
 
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
+
+/*
+ * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
+ * free_pdev().  Never de-reference pdev without holding pdev->lock or
+ * pcidevs_lock.  Always aquire pcidevs_lock before pdev->lock when
+ * doing free_pdev().
+ */
+
+extern rwlock_t pcidevs_lock;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
+void free_pdev(struct pci_dev *pdev);
+struct pci_dev *pci_lock_pdev(int bus, int devfn);
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 
 uint8_t pci_conf_read8(

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel