WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Re-enable MSI support

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Re-enable MSI support
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 16 Dec 2008 22:30:42 -0800
Delivery-date: Tue, 16 Dec 2008 22:31:44 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1228996099 0
# Node ID 2941b1a97c6015aa5618373e4b280dfc88c16784
# Parent  c15244125a693d2a1ae5e5745a649467394d8dac
Re-enable MSI support

Currently the MSI is disabled because of some lock issue. This patch
tries to clean up the locking related to MSI lock.

Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
---
 xen/arch/x86/domctl.c                       |   17 --
 xen/arch/x86/irq.c                          |   59 +++++---
 xen/arch/x86/msi.c                          |  175 ++++++++++-------------
 xen/arch/x86/physdev.c                      |    6 
 xen/arch/x86/x86_64/asm-offsets.c           |    2 
 xen/common/domain.c                         |    8 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   16 +-
 xen/drivers/passthrough/iommu.c             |   58 +++++--
 xen/drivers/passthrough/pci.c               |   73 ++++-----
 xen/drivers/passthrough/vtd/iommu.c         |  206 ++++++++++++++++------------
 xen/include/asm-x86/msi.h                   |    8 -
 xen/include/xen/iommu.h                     |    2 
 xen/include/xen/pci.h                       |    3 
 13 files changed, 337 insertions(+), 296 deletions(-)

diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/domctl.c     Thu Dec 11 11:48:19 2008 +0000
@@ -665,14 +665,6 @@ long arch_do_domctl(
         }
 
         ret = -EINVAL;
-        if ( device_assigned(bus, devfn) )
-        {
-            gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
-                     "%x:%x:%x already assigned, or non-existent\n",
-                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-            put_domain(d);
-            break;
-        }
 
         ret = assign_device(d, bus, devfn);
         if ( ret )
@@ -715,15 +707,8 @@ long arch_do_domctl(
             put_domain(d);
             break;
         }
-
-        if ( !device_assigned(bus, devfn) )
-        {
-            put_domain(d);
-            break;
-        }
-
         ret = 0;
-        deassign_device(d, bus, devfn);
+        ret = deassign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/irq.c        Thu Dec 11 11:48:19 2008 +0000
@@ -847,12 +847,11 @@ int map_domain_pirq(
     int old_vector, old_pirq;
     irq_desc_t *desc;
     unsigned long flags;
-
+    struct msi_desc *msi_desc;
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
-
-    /* XXX Until pcidev and msi locking is fixed. */
-    if ( type == MAP_PIRQ_TYPE_MSI )
-        return -EINVAL;
 
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
@@ -884,25 +883,35 @@ int map_domain_pirq(
     }
 
     desc = &irq_desc[vector];
-    spin_lock_irqsave(&desc->lock, flags);
 
     if ( type == MAP_PIRQ_TYPE_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
+
+        pdev = pci_get_pdev(msi->bus, msi->devfn);
+        ret = pci_enable_msi(msi, &msi_desc);
+        if ( ret )
+            goto done;
+
+        spin_lock_irqsave(&desc->lock, flags);
+
         if ( desc->handler != &no_irq_type )
             dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
-                    d->domain_id, vector);
+              d->domain_id, vector);
         desc->handler = &pci_msi_type;
-        ret = pci_enable_msi(msi);
-        if ( ret )
-            goto done;
-    }
-
-    d->arch.pirq_vector[pirq] = vector;
-    d->arch.vector_pirq[vector] = pirq;
+        d->arch.pirq_vector[pirq] = vector;
+        d->arch.vector_pirq[vector] = pirq;
+        setup_msi_irq(pdev, msi_desc);
+        spin_unlock_irqrestore(&desc->lock, flags);
+    } else
+    {
+        spin_lock_irqsave(&desc->lock, flags);
+        d->arch.pirq_vector[pirq] = vector;
+        d->arch.vector_pirq[vector] = pirq;
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
 
  done:
-    spin_unlock_irqrestore(&desc->lock, flags);
     return ret;
 }
 
@@ -913,6 +922,7 @@ int unmap_domain_pirq(struct domain *d, 
     irq_desc_t *desc;
     int vector, ret = 0;
     bool_t forced_unbind;
+    struct msi_desc *msi_desc = NULL;
 
     if ( (pirq < 0) || (pirq >= NR_IRQS) )
         return -EINVAL;
@@ -920,6 +930,7 @@ int unmap_domain_pirq(struct domain *d, 
     if ( !IS_PRIV(current->domain) )
         return -EINVAL;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
     vector = d->arch.pirq_vector[pirq];
@@ -937,18 +948,19 @@ int unmap_domain_pirq(struct domain *d, 
                 d->domain_id, pirq);
 
     desc = &irq_desc[vector];
+
+    if ( (msi_desc = desc->msi_desc) != NULL )
+        pci_disable_msi(msi_desc);
+
     spin_lock_irqsave(&desc->lock, flags);
 
     BUG_ON(vector != d->arch.pirq_vector[pirq]);
 
-    if ( desc->msi_desc )
-        pci_disable_msi(vector);
+    if ( msi_desc )
+        teardown_msi_vector(vector);
 
     if ( desc->handler == &pci_msi_type )
-    {
         desc->handler = &no_irq_type;
-        free_irq_vector(vector);
-    }
 
     if ( !forced_unbind )
     {
@@ -962,6 +974,11 @@ int unmap_domain_pirq(struct domain *d, 
     }
 
     spin_unlock_irqrestore(&desc->lock, flags);
+    if (msi_desc)
+    {
+        msi_free_vector(msi_desc);
+        free_irq_vector(vector);
+    }
 
     ret = irq_deny_access(d, pirq);
     if ( ret )
@@ -976,6 +993,7 @@ void free_domain_pirqs(struct domain *d)
 {
     int i;
 
+    read_lock(&pcidevs_lock);
     spin_lock(&d->event_lock);
 
     for ( i = 0; i < NR_IRQS; i++ )
@@ -983,6 +1001,7 @@ void free_domain_pirqs(struct domain *d)
             unmap_domain_pirq(d, i);
 
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
 }
 
 extern void dump_ioapic_irq_info(void);
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/msi.c        Thu Dec 11 11:48:19 2008 +0000
@@ -153,6 +153,8 @@ static int set_vector_msi(struct msi_des
 
 static int unset_vector_msi(int vector)
 {
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
+
     if ( vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -161,6 +163,7 @@ static int unset_vector_msi(int vector)
     }
 
     irq_desc[vector].msi_desc = NULL;
+
     return 0;
 }
 
@@ -228,14 +231,12 @@ void set_msi_affinity(unsigned int vecto
         return;
 
     ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    spin_lock(&desc->dev->lock);
     read_msi_msg(desc, &msg);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
     write_msi_msg(desc, &msg);
-    spin_unlock(&desc->dev->lock);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -369,7 +370,7 @@ static struct msi_desc* alloc_msi_entry(
     return entry;
 }
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
     struct msi_msg msg;
 
@@ -380,19 +381,13 @@ static int setup_msi_irq(struct pci_dev 
     return 0;
 }
 
-static void teardown_msi_vector(int vector)
+void teardown_msi_vector(int vector)
 {
     unset_vector_msi(vector);
 }
 
-static void msi_free_vector(int vector)
-{
-    struct msi_desc *entry;
-
-    ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    entry = irq_desc[vector].msi_desc;
-    teardown_msi_vector(vector);
-
+int msi_free_vector(struct msi_desc *entry)
+{
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
     {
         unsigned long start;
@@ -407,6 +402,7 @@ static void msi_free_vector(int vector)
     }
     list_del(&entry->list);
     xfree(entry);
+    return 0;
 }
 
 static struct msi_desc *find_msi_entry(struct pci_dev *dev,
@@ -433,15 +429,18 @@ static struct msi_desc *find_msi_entry(s
  * multiple messages. A return of zero indicates the successful setup
  * of an entry zero with the new MSI irq or non-zero for otherwise.
  **/
-static int msi_capability_init(struct pci_dev *dev, int vector)
+static int msi_capability_init(struct pci_dev *dev,
+                               int vector,
+                               struct msi_desc **desc)
 {
     struct msi_desc *entry;
-    int pos, ret;
+    int pos;
     u16 control;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
     /* MSI Entry Initialization */
@@ -477,14 +476,7 @@ static int msi_capability_init(struct pc
     }
     list_add_tail(&entry->list, &dev->msi_list);
 
-    /* Configure MSI capability structure */
-    ret = setup_msi_irq(dev, entry);
-    if ( ret )
-    {
-        msi_free_vector(vector);
-        return ret;
-    }
-
+    *desc = entry;
     /* Restore the original MSI enabled bits  */
     pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
 
@@ -501,7 +493,9 @@ static int msi_capability_init(struct pc
  * single MSI-X irq. A return of zero indicates the successful setup of
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
-static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
+static int msix_capability_init(struct pci_dev *dev,
+                                struct msi_info *msi,
+                                struct msi_desc **desc)
 {
     struct msi_desc *entry;
     int pos;
@@ -515,6 +509,9 @@ static int msix_capability_init(struct p
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    ASSERT(desc);
+
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
@@ -550,9 +547,13 @@ static int msix_capability_init(struct p
 
     list_add_tail(&entry->list, &dev->msi_list);
 
-    setup_msi_irq(dev, entry);
-
-    /* Set MSI-X enabled bits */
+    /* Mask interrupt here */
+    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
+                * PCI_MSIX_ENTRY_SIZE
+                + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
+    *desc = entry;
+    /* Restore MSI-X enabled bits */
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
@@ -568,45 +569,35 @@ static int msix_capability_init(struct p
  * indicates the successful setup of an entry zero with the new MSI
  * irq or non-zero for otherwise.
  **/
-static int __pci_enable_msi(struct msi_info *msi)
+static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
     int status;
     struct pci_dev *pdev;
 
-    pdev = pci_lock_pdev(msi->bus, msi->devfn);
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
     if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
     {
-        spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
                 "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msi_capability_init(pdev, msi->vector);
-    spin_unlock(&pdev->lock);
+    status = msi_capability_init(pdev, msi->vector, desc);
     return status;
 }
 
-static void __pci_disable_msi(int vector)
-{
-    struct msi_desc *entry;
+static void __pci_disable_msi(struct msi_desc *entry)
+{
     struct pci_dev *dev;
     int pos;
     u16 control;
     u8 bus, slot, func;
 
-    entry = irq_desc[vector].msi_desc;
-    if ( !entry )
-        return;
-    /*
-     * Lock here is safe.  msi_desc can not be removed without holding
-     * both irq_desc[].lock (which we do) and pdev->lock.
-     */
-    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -618,10 +609,6 @@ static void __pci_disable_msi(int vector
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    msi_free_vector(vector);
-
-    pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
-    spin_unlock(&dev->lock);
 }
 
 /**
@@ -639,7 +626,7 @@ static void __pci_disable_msi(int vector
  * of irqs available. Driver should use the returned value to re-send
  * its request.
  **/
-static int __pci_enable_msix(struct msi_info *msi)
+static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
 {
     int status, pos, nr_entries;
     struct pci_dev *pdev;
@@ -647,7 +634,8 @@ static int __pci_enable_msix(struct msi_
     u8 slot = PCI_SLOT(msi->devfn);
     u8 func = PCI_FUNC(msi->devfn);
 
-    pdev = pci_lock_pdev(msi->bus, msi->devfn);
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
@@ -655,41 +643,27 @@ static int __pci_enable_msix(struct msi_
     control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
     if (msi->entry_nr >= nr_entries)
-    {
-        spin_unlock(&pdev->lock);
         return -EINVAL;
-    }
 
     if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
     {
-        spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
                 "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msix_capability_init(pdev, msi);
-    spin_unlock(&pdev->lock);
+    status = msix_capability_init(pdev, msi, desc);
     return status;
 }
 
-static void __pci_disable_msix(int vector)
-{
-    struct msi_desc *entry;
+static void __pci_disable_msix(struct msi_desc *entry)
+{
     struct pci_dev *dev;
     int pos;
     u16 control;
     u8 bus, slot, func;
 
-    entry = irq_desc[vector].msi_desc;
-    if ( !entry )
-        return;
-    /*
-     * Lock here is safe.  msi_desc can not be removed without holding
-     * both irq_desc[].lock (which we do) and pdev->lock.
-     */
-    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -701,50 +675,51 @@ static void __pci_disable_msix(int vecto
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    msi_free_vector(vector);
+    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
+      * PCI_MSIX_ENTRY_SIZE
+      + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
-    spin_unlock(&dev->lock);
-}
-
-int pci_enable_msi(struct msi_info *msi)
-{
-    ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
-
-    return  msi->table_base ? __pci_enable_msix(msi) :
-        __pci_enable_msi(msi);
-}
-
-void pci_disable_msi(int vector)
-{
-    irq_desc_t *desc = &irq_desc[vector];
-    ASSERT(spin_is_locked(&desc->lock));
-    if ( !desc->msi_desc )
-        return;
-
-    if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
-        __pci_disable_msi(vector);
-    else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
-        __pci_disable_msix(vector);
+}
+
+/*
+ * Notice: only construct the msi_desc
+ * no change to irq_desc here, and the interrupt is masked
+ */
+int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
+{
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    return  msi->table_base ? __pci_enable_msix(msi, desc) :
+        __pci_enable_msi(msi, desc);
+}
+
+/*
+ * Device only, no irq_desc
+ */
+void pci_disable_msi(struct msi_desc *msi_desc)
+{
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        __pci_disable_msi(msi_desc);
+    else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
+        __pci_disable_msix(msi_desc);
 }
 
 static void msi_free_vectors(struct pci_dev* dev)
 {
     struct msi_desc *entry, *tmp;
     irq_desc_t *desc;
-    unsigned long flags;
-
- retry:
+    unsigned long flags, vector;
+
     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
     {
-        desc = &irq_desc[entry->vector];
-
-        local_irq_save(flags);
-        if ( !spin_trylock(&desc->lock) )
-        {
-            local_irq_restore(flags);
-            goto retry;
-        }
+        vector = entry->vector;
+        desc = &irq_desc[vector];
+        pci_disable_msi(entry);
+
+        spin_lock_irqsave(&desc->lock, flags);
+
+        teardown_msi_vector(vector);
 
         if ( desc->handler == &pci_msi_type )
         {
@@ -753,8 +728,8 @@ static void msi_free_vectors(struct pci_
             desc->handler = &no_irq_type;
         }
 
-        msi_free_vector(entry->vector);
         spin_unlock_irqrestore(&desc->lock, flags);
+        msi_free_vector(entry);
     }
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/physdev.c    Thu Dec 11 11:48:19 2008 +0000
@@ -100,6 +100,7 @@ static int physdev_map_pirq(struct physd
             goto free_domain;
     }
 
+    read_lock(&pcidevs_lock);
     /* Verify or get pirq. */
     spin_lock(&d->event_lock);
     if ( map->pirq < 0 )
@@ -147,6 +148,7 @@ static int physdev_map_pirq(struct physd
 
 done:
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
     if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
         free_irq_vector(vector);
 free_domain:
@@ -170,9 +172,11 @@ static int physdev_unmap_pirq(struct phy
     if ( d == NULL )
         return -ESRCH;
 
+    read_lock(&pcidevs_lock);
     spin_lock(&d->event_lock);
     ret = unmap_domain_pirq(d, unmap->pirq);
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
 
     rcu_unlock_domain(d);
 
@@ -341,10 +345,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         irq_op.vector = assign_irq_vector(irq);
 
+        read_lock(&pcidevs_lock);
         spin_lock(&dom0->event_lock);
         ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
                               MAP_PIRQ_TYPE_GSI, NULL);
         spin_unlock(&dom0->event_lock);
+        read_unlock(&pcidevs_lock);
 
         if ( copy_to_guest(arg, &irq_op, 1) != 0 )
             ret = -EFAULT;
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:48:19 2008 +0000
@@ -60,6 +60,8 @@ void __dummy__(void)
     DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
     BLANK();
 
+    OFFSET(irq_caps_offset, struct domain, irq_caps);
+    OFFSET(next_in_list_offset, struct domain, next_in_list);
     OFFSET(VCPU_processor, struct vcpu, processor);
     OFFSET(VCPU_domain, struct vcpu, domain);
     OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
diff -r c15244125a69 -r 2941b1a97c60 xen/common/domain.c
--- a/xen/common/domain.c       Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/common/domain.c       Thu Dec 11 11:48:19 2008 +0000
@@ -558,11 +558,11 @@ static void complete_domain_destroy(stru
         sched_destroy_vcpu(v);
     }
 
+    grant_table_destroy(d);
+
+    arch_domain_destroy(d);
+
     rangeset_domain_destroy(d);
-
-    grant_table_destroy(d);
-
-    arch_domain_destroy(d);
 
     sched_destroy_domain(d);
 
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Dec 11 11:40:10 
2008 +0000
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Dec 11 11:48:19 
2008 +0000
@@ -282,9 +282,13 @@ static int reassign_device( struct domai
     struct amd_iommu *iommu;
     int bdf;
 
-    pdev = pci_lock_domain_pdev(source, bus, devfn);
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev_by_domain(source, bus, devfn);
     if ( !pdev )
-       return -ENODEV;
+    {
+        read_unlock(&pcidevs_lock);
+        return -ENODEV;
+    }
 
     bdf = (bus << 8) | devfn;
     /* supported device? */
@@ -293,8 +297,8 @@ static int reassign_device( struct domai
 
     if ( !iommu )
     {
-       spin_unlock(&pdev->lock);
-       amd_iov_error("Fail to find iommu."
+        read_unlock(&pcidevs_lock);
+        amd_iov_error("Fail to find iommu."
                      " %x:%x.%x cannot be assigned to domain %d\n", 
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
        return -ENODEV;
@@ -302,9 +306,7 @@ static int reassign_device( struct domai
 
     amd_iommu_disable_domain_device(source, iommu, bdf);
 
-    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
-    write_unlock(&pcidevs_lock);
     pdev->domain = target;
 
     amd_iommu_setup_domain_device(target, iommu, bdf);
@@ -312,7 +314,7 @@ static int reassign_device( struct domai
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
-    spin_unlock(&pdev->lock);
+    read_unlock(&pcidevs_lock);
     return 0;
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/iommu.c   Thu Dec 11 11:48:19 2008 +0000
@@ -83,8 +83,11 @@ int iommu_add_device(struct pci_dev *pde
 int iommu_add_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+
     if ( !pdev->domain )
         return -EINVAL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     hd = domain_hvm_iommu(pdev->domain);
     if ( !iommu_enabled || !hd->platform_ops )
@@ -109,20 +112,24 @@ int assign_device(struct domain *d, u8 b
 int assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
-    int rc;
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
+    int rc = 0;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    read_lock(&pcidevs_lock);
     if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
-        return rc;
+        goto done;
 
     if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) )
     {
         d->need_iommu = 1;
-        return iommu_populate_page_table(d);
-    }
-    return 0;
+        rc = iommu_populate_page_table(d);
+        goto done;
+    }
+done:    
+    read_unlock(&pcidevs_lock);
+    return rc;
 }
 
 static int iommu_populate_page_table(struct domain *d)
@@ -204,12 +211,29 @@ int iommu_unmap_page(struct domain *d, u
     return hd->platform_ops->unmap_page(d, gfn);
 }
 
-void deassign_device(struct domain *d, u8 bus, u8 devfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return;
+int  deassign_device(struct domain *d, u8 bus, u8 devfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev = NULL;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return -EINVAL;
+
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(bus, devfn);
+    if (!pdev)
+    {
+        read_unlock(&pcidevs_lock);
+        return -ENODEV;
+    }
+
+    if (pdev->domain != d)
+    {
+        read_unlock(&pcidevs_lock);
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                "IOMMU: deassign a device not owned\n");
+       return -EINVAL;
+    }
 
     hd->platform_ops->reassign_device(d, dom0, bus, devfn);
 
@@ -218,6 +242,10 @@ void deassign_device(struct domain *d, u
         d->need_iommu = 0;
         hd->platform_ops->teardown(d);
     }
+
+    read_unlock(&pcidevs_lock);
+
+    return 0;
 }
 
 static int iommu_setup(void)
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/pci.c     Thu Dec 11 11:48:19 2008 +0000
@@ -41,11 +41,11 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de
     pdev = xmalloc(struct pci_dev);
     if ( !pdev )
         return NULL;
+    memset(pdev, 0, sizeof(struct pci_dev));
 
     *((u8*) &pdev->bus) = bus;
     *((u8*) &pdev->devfn) = devfn;
     pdev->domain = NULL;
-    spin_lock_init(&pdev->lock);
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &alldevs_list);
 
@@ -58,42 +58,35 @@ void free_pdev(struct pci_dev *pdev)
     xfree(pdev);
 }
 
-struct pci_dev *pci_lock_pdev(int bus, int devfn)
-{
-    struct pci_dev *pdev;
-
-    read_lock(&pcidevs_lock);
+struct pci_dev *pci_get_pdev(int bus, int devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
         if ( (pdev->bus == bus || bus == -1) &&
              (pdev->devfn == devfn || devfn == -1) )
-    {
-        spin_lock(&pdev->lock);
-        read_unlock(&pcidevs_lock);
-        return pdev;
-    }
-    read_unlock(&pcidevs_lock);
+        {
+            return pdev;
+        }
 
     return NULL;
 }
 
-struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
-{
-    struct pci_dev *pdev;
-
-    read_lock(&pcidevs_lock);
-    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
-    {
-        spin_lock(&pdev->lock);
-        if ( (pdev->bus == bus || bus == -1) &&
-             (pdev->devfn == devfn || devfn == -1) &&
-             (pdev->domain == d) )
-        {
-            read_unlock(&pcidevs_lock);
-            return pdev;
-        }
-        spin_unlock(&pdev->lock);
-    }
-    read_unlock(&pcidevs_lock);
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+         if ( (pdev->bus == bus || bus == -1) &&
+              (pdev->devfn == devfn || devfn == -1) &&
+              (pdev->domain == d) )
+         {
+             return pdev;
+         }
 
     return NULL;
 }
@@ -109,24 +102,20 @@ int pci_add_device(u8 bus, u8 devfn)
         goto out;
 
     ret = 0;
-    spin_lock(&pdev->lock);
     if ( !pdev->domain )
     {
         pdev->domain = dom0;
         ret = iommu_add_device(pdev);
         if ( ret )
-        {
-            spin_unlock(&pdev->lock);
             goto out;
-        }
+
         list_add(&pdev->domain_list, &dom0->arch.pdev_list);
     }
-    spin_unlock(&pdev->lock);
+
+out:
+    write_unlock(&pcidevs_lock);
     printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
            PCI_SLOT(devfn), PCI_FUNC(devfn));
-
-out:
-    write_unlock(&pcidevs_lock);
     return ret;
 }
 
@@ -139,7 +128,6 @@ int pci_remove_device(u8 bus, u8 devfn)
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
         if ( pdev->bus == bus && pdev->devfn == devfn )
         {
-            spin_lock(&pdev->lock);
             ret = iommu_remove_device(pdev);
             if ( pdev->domain )
                 list_del(&pdev->domain_list);
@@ -199,14 +187,15 @@ void pci_release_devices(struct domain *
     struct pci_dev *pdev;
     u8 bus, devfn;
 
+    read_lock(&pcidevs_lock);
     pci_clean_dpci_irqs(d);
-    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+    while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) )
     {
         pci_cleanup_msi(pdev);
         bus = pdev->bus; devfn = pdev->devfn;
-        spin_unlock(&pdev->lock);
         deassign_device(d, bus, devfn);
     }
+    read_unlock(&pcidevs_lock);
 }
 
 #ifdef SUPPORT_MSI_REMAPPING
@@ -220,14 +209,12 @@ static void dump_pci_devices(unsigned ch
 
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
     {
-        spin_lock(&pdev->lock);
         printk("%02x:%02x.%x - dom %-3d - MSIs < ",
                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                pdev->domain ? pdev->domain->domain_id : -1);
         list_for_each_entry ( msi, &pdev->msi_list, list )
                printk("%d ", msi->vector);
         printk(">\n");
-        spin_unlock(&pdev->lock);
     }
 
     read_unlock(&pcidevs_lock);
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Dec 11 11:48:19 2008 +0000
@@ -49,15 +49,14 @@ static void context_set_domain_id(struct
 static void context_set_domain_id(struct context_entry *context,
                                   struct domain *d)
 {
-    unsigned long flags;
     domid_t iommu_domid = domain_iommu_domid(d);
 
     if ( iommu_domid == 0 )
     {
-        spin_lock_irqsave(&domid_bitmap_lock, flags);
+        spin_lock(&domid_bitmap_lock);
         iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
         set_bit(iommu_domid, domid_bitmap);
-        spin_unlock_irqrestore(&domid_bitmap_lock, flags);
+        spin_unlock(&domid_bitmap_lock);
         d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
     }
 
@@ -140,10 +139,9 @@ static u64 bus_to_context_maddr(struct i
 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
 {
     struct root_entry *root, *root_entries;
-    unsigned long flags;
     u64 maddr;
 
-    spin_lock_irqsave(&iommu->lock, flags);
+    ASSERT(spin_is_locked(&iommu->lock));
     root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
     root = &root_entries[bus];
     if ( !root_present(*root) )
@@ -152,7 +150,6 @@ static u64 bus_to_context_maddr(struct i
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
-            spin_unlock_irqrestore(&iommu->lock, flags);
             return 0;
         }
         set_root_value(*root, maddr);
@@ -161,34 +158,7 @@ static u64 bus_to_context_maddr(struct i
     }
     maddr = (u64) get_context_addr(*root);
     unmap_vtd_domain_page(root_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
     return maddr;
-}
-
-static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
-{
-    struct root_entry *root, *root_entries;
-    struct context_entry *context;
-    u64 context_maddr;
-    int ret;
-    unsigned long flags;
-
-    spin_lock_irqsave(&iommu->lock, flags);
-    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
-    root = &root_entries[bus];
-    if ( !root_present(*root) )
-    {
-        ret = 0;
-        goto out;
-    }
-    context_maddr = get_context_addr(*root);
-    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
-    ret = context_present(context[devfn]);
-    unmap_vtd_domain_page(context);
- out:
-    unmap_vtd_domain_page(root_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
-    return ret;
 }
 
 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
@@ -198,12 +168,11 @@ static u64 addr_to_dma_page_maddr(struct
     struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
-    unsigned long flags;
     u64 pte_maddr = 0, maddr;
     u64 *vaddr = NULL;
 
     addr &= (((u64)1) << addr_width) - 1;
-    spin_lock_irqsave(&hd->mapping_lock, flags);
+    ASSERT(spin_is_locked(&hd->mapping_lock));
     if ( hd->pgd_maddr == 0 )
         if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
             goto out;
@@ -252,7 +221,6 @@ static u64 addr_to_dma_page_maddr(struct
 
     unmap_vtd_domain_page(parent);
  out:
-    spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
 }
 
@@ -536,22 +504,30 @@ static void dma_pte_clear_one(struct dom
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
 
+    spin_lock(&hd->mapping_lock);
     /* get last level pte */
     pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
     if ( pg_maddr == 0 )
+    {
+        spin_unlock(&hd->mapping_lock);
         return;
+    }
+
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + address_level_offset(addr, 1);
 
     if ( !dma_pte_present(*pte) )
     {
+        spin_unlock(&hd->mapping_lock);
         unmap_vtd_domain_page(page);
         return;
     }
 
     dma_clear_pte(*pte); 
+    spin_unlock(&hd->mapping_lock);
     iommu_flush_cache_entry(pte);
 
+    /* No need pcidevs_lock here since do that on assign/deassign device*/
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
@@ -598,16 +574,18 @@ static int iommu_set_root_entry(struct i
     unsigned long flags;
     s_time_t start_time;
 
-    spin_lock_irqsave(&iommu->register_lock, flags);
+    spin_lock(&iommu->lock);
 
     if ( iommu->root_maddr == 0 )
         iommu->root_maddr = alloc_pgtable_maddr(NULL);
     if ( iommu->root_maddr == 0 )
     {
-        spin_unlock_irqrestore(&iommu->register_lock, flags);
+        spin_unlock(&iommu->lock);
         return -ENOMEM;
     }
 
+    spin_unlock(&iommu->lock);
+    spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
     cmd = iommu->gcmd | DMA_GCMD_SRTP;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
@@ -742,9 +720,7 @@ static void iommu_page_fault(int vector,
     dprintk(XENLOG_WARNING VTDPREFIX,
             "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
 
-    spin_lock_irqsave(&iommu->register_lock, flags);
     fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
 
     iommu_fault_status(fault_status);
 
@@ -1057,21 +1033,30 @@ static int domain_context_mapping_one(
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
-    unsigned long flags;
     u64 maddr, pgd_maddr;
+    struct pci_dev *pdev = NULL;
     int agaw;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&iommu->lock);
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
 
     if ( context_present(*context) )
     {
+        int res = 0;
+
+        pdev = pci_get_pdev(bus, devfn);
+        if (!pdev)
+            res = -ENODEV;
+        else if (pdev->domain != domain)
+            res = -EINVAL;
         unmap_vtd_domain_page(context_entries);
-        return 0;
-    }
-
-    spin_lock_irqsave(&iommu->lock, flags);
+        spin_unlock(&iommu->lock);
+        return res;
+    }
+
     if ( iommu_passthrough &&
          ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
     {
@@ -1080,6 +1065,8 @@ static int domain_context_mapping_one(
     }
     else
     {
+        spin_lock(&hd->mapping_lock);
+
         /* Ensure we have pagetables allocated down to leaf PTE. */
         if ( hd->pgd_maddr == 0 )
         {
@@ -1087,8 +1074,9 @@ static int domain_context_mapping_one(
             if ( hd->pgd_maddr == 0 )
             {
             nomem:
+                spin_unlock(&hd->mapping_lock);
+                spin_unlock(&iommu->lock);
                 unmap_vtd_domain_page(context_entries);
-                spin_unlock_irqrestore(&iommu->lock, flags);
                 return -ENOMEM;
             }
         }
@@ -1108,6 +1096,7 @@ static int domain_context_mapping_one(
 
         context_set_address_root(*context, pgd_maddr);
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+        spin_unlock(&hd->mapping_lock);
     }
 
     /*
@@ -1119,8 +1108,7 @@ static int domain_context_mapping_one(
     context_set_fault_enable(*context);
     context_set_present(*context);
     iommu_flush_cache_entry(context);
-
-    unmap_vtd_domain_page(context_entries);
+    spin_unlock(&iommu->lock);
 
     /* Context entry was previously non-present (with domid 0). */
     if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
@@ -1130,7 +1118,8 @@ static int domain_context_mapping_one(
         iommu_flush_iotlb_dsi(iommu, 0, 1);
 
     set_bit(iommu->index, &hd->iommu_bitmap);
-    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    unmap_vtd_domain_page(context_entries);
 
     return 0;
 }
@@ -1174,17 +1163,15 @@ int pdev_type(u8 bus, u8 devfn)
 }
 
 #define MAX_BUSES 256
+static DEFINE_SPINLOCK(bus2bridge_lock);
 static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
 
-static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+static int _find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
 {
     int cnt = 0;
     *secbus = *bus;
 
-    if ( *bus == 0 )
-        /* assume integrated PCI devices in RC have valid requester-id */
-        return 1;
-
+    ASSERT(spin_is_locked(&bus2bridge_lock));
     if ( !bus2bridge[*bus].map )
         return 0;
 
@@ -1200,6 +1187,21 @@ static int find_pcie_endpoint(u8 *bus, u
     return 1;
 }
 
+static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+{
+    int ret = 0;
+
+    if ( *bus == 0 )
+        /* assume integrated PCI devices in RC have valid requester-id */
+        return 1;
+
+    spin_lock(&bus2bridge_lock);
+    ret = _find_pcie_endpoint(bus, devfn, secbus);
+    spin_unlock(&bus2bridge_lock);
+
+    return ret;
+}
+
 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
 {
     struct acpi_drhd_unit *drhd;
@@ -1211,6 +1213,8 @@ static int domain_context_mapping(struct
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
     if ( !drhd )
         return -ENODEV;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     type = pdev_type(bus, devfn);
     switch ( type )
@@ -1226,12 +1230,14 @@ static int domain_context_mapping(struct
         if ( type == DEV_TYPE_PCIe_BRIDGE )
             break;
 
+        spin_lock(&bus2bridge_lock);
         for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
         {
             bus2bridge[sec_bus].map = 1;
             bus2bridge[sec_bus].bus =  bus;
             bus2bridge[sec_bus].devfn =  devfn;
         }
+        spin_unlock(&bus2bridge_lock);
         break;
 
     case DEV_TYPE_PCIe_ENDPOINT:
@@ -1290,8 +1296,10 @@ static int domain_context_unmap_one(
     u8 bus, u8 devfn)
 {
     struct context_entry *context, *context_entries;
-    unsigned long flags;
     u64 maddr;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&iommu->lock);
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1299,11 +1307,11 @@ static int domain_context_unmap_one(
 
     if ( !context_present(*context) )
     {
+        spin_unlock(&iommu->lock);
         unmap_vtd_domain_page(context_entries);
         return 0;
     }
 
-    spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
     iommu_flush_cache_entry(context);
@@ -1315,8 +1323,8 @@ static int domain_context_unmap_one(
     else
         iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
 
+    spin_unlock(&iommu->lock);
     unmap_vtd_domain_page(context_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
 }
@@ -1380,7 +1388,10 @@ static int reassign_device_ownership(
     struct iommu *pdev_iommu;
     int ret, found = 0;
 
-    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev_by_domain(source, bus, devfn);
+
+    if (!pdev)
         return -ENODEV;
 
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
@@ -1391,14 +1402,9 @@ static int reassign_device_ownership(
     if ( ret )
         return ret;
 
-    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
-    write_unlock(&pcidevs_lock);
     pdev->domain = target;
 
-    spin_unlock(&pdev->lock);
-
-    read_lock(&pcidevs_lock);
     for_each_pdev ( source, pdev )
     {
         drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
@@ -1408,7 +1414,6 @@ static int reassign_device_ownership(
             break;
         }
     }
-    read_unlock(&pcidevs_lock);
 
     if ( !found )
         clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
@@ -1423,20 +1428,13 @@ void iommu_domain_teardown(struct domain
     if ( list_empty(&acpi_drhd_units) )
         return;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&hd->mapping_lock);
     iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
     hd->pgd_maddr = 0;
+    spin_unlock(&hd->mapping_lock);
+
     iommu_domid_release(d);
-}
-
-static int domain_context_mapped(u8 bus, u8 devfn)
-{
-    struct acpi_drhd_unit *drhd;
-
-    for_each_drhd_unit ( drhd )
-        if ( device_context_mapped(drhd->iommu, bus, devfn) )
-            return 1;
-
-    return 0;
 }
 
 int intel_iommu_map_page(
@@ -1457,17 +1455,27 @@ int intel_iommu_map_page(
          ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
 
+    spin_lock(&hd->mapping_lock);
+
     pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
     if ( pg_maddr == 0 )
+    {
+        spin_unlock(&hd->mapping_lock);
         return -ENOMEM;
+    }
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + (gfn & LEVEL_MASK);
     pte_present = dma_pte_present(*pte);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
     iommu_flush_cache_entry(pte);
+    spin_unlock(&hd->mapping_lock);
     unmap_vtd_domain_page(page);
 
+    /*
+     * No need pcideves_lock here because we have flush
+     * when assign/deassign device
+     */
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
@@ -1510,6 +1518,7 @@ static int iommu_prepare_rmrr_dev(struct
     u64 base, end;
     unsigned long base_pfn, end_pfn;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(rmrr->base_address < rmrr->end_address);
     
     base = rmrr->base_address & PAGE_MASK_4K;
@@ -1523,8 +1532,7 @@ static int iommu_prepare_rmrr_dev(struct
         base_pfn++;
     }
 
-    if ( domain_context_mapped(bus, devfn) == 0 )
-        ret = domain_context_mapping(d, bus, devfn);
+    ret = domain_context_mapping(d, bus, devfn);
 
     return ret;
 }
@@ -1534,6 +1542,8 @@ static int intel_iommu_add_device(struct
     struct acpi_rmrr_unit *rmrr;
     u16 bdf;
     int ret, i;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -1689,6 +1699,7 @@ static void setup_dom0_rmrr(struct domai
     u16 bdf;
     int ret, i;
 
+    read_lock(&pcidevs_lock);
     for_each_rmrr_device ( rmrr, bdf, i )
     {
         ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
@@ -1696,6 +1707,7 @@ static void setup_dom0_rmrr(struct domai
             gdprintk(XENLOG_ERR VTDPREFIX,
                      "IOMMU: mapping reserved region failed\n");
     }
+    read_unlock(&pcidevs_lock);
 }
 
 int intel_vtd_setup(void)
@@ -1748,27 +1760,43 @@ int device_assigned(u8 bus, u8 devfn)
 {
     struct pci_dev *pdev;
 
-    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
-    {
-        spin_unlock(&pdev->lock);
-        return 0;
-    }
-
-    return 1;
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
+    if (!pdev)
+    {
+        read_unlock(&pcidevs_lock);
+        return -1;
+    }
+
+    read_unlock(&pcidevs_lock);
+    return 0;
 }
 
 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct acpi_rmrr_unit *rmrr;
     int ret = 0, i;
+    struct pci_dev *pdev;
     u16 bdf;
 
     if ( list_empty(&acpi_drhd_units) )
         return -ENODEV;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(bus, devfn);
+    if (!pdev)
+        return -ENODEV;
+
+    if (pdev->domain != dom0)
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                "IOMMU: assign a assigned device\n");
+       return -EBUSY;
+    }
+
     ret = reassign_device_ownership(dom0, d, bus, devfn);
     if ( ret )
-        return ret;
+        goto done;
 
     /* Setup rmrr identity mapping */
     for_each_rmrr_device( rmrr, bdf, i )
@@ -1779,16 +1807,20 @@ int intel_iommu_assign_device(struct dom
              * ignore USB RMRR temporarily.
              */
             if ( is_usb_device(bus, devfn) )
-                return 0;
+            {
+                ret = 0;
+                goto done;
+            }
 
             ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
             if ( ret )
                 gdprintk(XENLOG_ERR VTDPREFIX,
                          "IOMMU: mapping reserved region failed\n");
-            return ret;
+            goto done; 
         }
     }
 
+done:
     return ret;
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/asm-x86/msi.h Thu Dec 11 11:48:19 2008 +0000
@@ -68,13 +68,17 @@ struct msi_msg {
        u32     data;           /* 16 bits of msi message data */
 };
 
+struct msi_desc;
 /* Helper functions */
 extern void mask_msi_vector(unsigned int vector);
 extern void unmask_msi_vector(unsigned int vector);
 extern void set_msi_affinity(unsigned int vector, cpumask_t mask);
-extern int pci_enable_msi(struct msi_info *msi);
-extern void pci_disable_msi(int vector);
+extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
+extern void pci_disable_msi(struct msi_desc *desc);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
+extern int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
+extern void teardown_msi_vector(int vector);
+extern int msi_free_vector(struct msi_desc *entry);
 
 struct msi_desc {
        struct {
diff -r c15244125a69 -r 2941b1a97c60 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/xen/iommu.h   Thu Dec 11 11:48:19 2008 +0000
@@ -62,7 +62,7 @@ void iommu_domain_destroy(struct domain 
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
 int assign_device(struct domain *d, u8 bus, u8 devfn);
-void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int deassign_device(struct domain *d, u8 bus, u8 devfn);
 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
diff -r c15244125a69 -r 2941b1a97c60 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/xen/pci.h     Thu Dec 11 11:48:19 2008 +0000
@@ -36,7 +36,6 @@ struct pci_dev {
     struct domain *domain;
     const u8 bus;
     const u8 devfn;
-    spinlock_t lock;
 };
 
 #define for_each_pdev(domain, pdev) \
@@ -59,6 +58,8 @@ void pci_release_devices(struct domain *
 void pci_release_devices(struct domain *d);
 int pci_add_device(u8 bus, u8 devfn);
 int pci_remove_device(u8 bus, u8 devfn);
+struct pci_dev *pci_get_pdev(int bus, int devfn);
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
 
 uint8_t pci_conf_read8(
     unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Re-enable MSI support, Xen patchbot-unstable <=