WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-3.2-testing] Handle DRHDs with different supported

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-3.2-testing] Handle DRHDs with different supported AGAWs.
From: "Xen patchbot-3.2-testing" <patchbot-3.2-testing@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 29 Jul 2008 19:00:26 -0700
Delivery-date: Tue, 29 Jul 2008 19:00:13 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1217344395 -3600
# Node ID f830b47149a4ccd7c0b0be0eaa41add653c90c36
# Parent  6de4320d71f9da9228a130e2b02bd855c1c53cf3
Handle DRHDs with different supported AGAWs.

This changeset is back-ported from xen-unstable.
Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx>

The original description of the changeset is:

vtd: Various cleanups and fixes:
* Handle DRHDs with different supported AGAWs. To support this we
create page tables which always have 4 levels, and skip top levels
for units which support only 2 or 3 levels.
* Handle systems with mixed DRHD support for cache snooping. We must
pessimistically CLFLUSH if any DRHD does not support snooping.

Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
xen-unstable changeset: 17755:ecd266cebcab648132d432899eabaecf8a168508
xen-unstable date: Fri May 30 15:06:08 2008 +0100
---
 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    |  175 +++++++++++++++---------------
 xen/include/asm-x86/hvm/vmx/intel-iommu.h |    1 
 xen/include/asm-x86/iommu.h               |    1 
 3 files changed, 95 insertions(+), 82 deletions(-)

diff -r 6de4320d71f9 -r f830b47149a4 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jul 29 16:13:15 2008 +0100
@@ -79,24 +79,28 @@ static void iommu_domid_release(struct d
     }
 }
 
-unsigned int x86_clflush_size;
-void clflush_cache_range(void *adr, int size)
+static unsigned int x86_clflush_size;
+static int iommus_incoherent;
+static void __iommu_flush_cache(void *addr, int size)
 {
     int i;
+
+    if ( !iommus_incoherent )
+        return;
+
     for ( i = 0; i < size; i += x86_clflush_size )
-        clflush(adr + i);
-}
-
-static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
-{
-    if ( !ecap_coherent(iommu->ecap) )
-        clflush_cache_range(addr, size);
-}
-
-#define iommu_flush_cache_entry(iommu, addr) \
-       __iommu_flush_cache(iommu, addr, 8)
-#define iommu_flush_cache_page(iommu, addr) \
-       __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
+        clflush((char*)addr + i);
+}
+
+void iommu_flush_cache_entry(void *addr)
+{
+    __iommu_flush_cache(addr, 8);
+}
+
+void iommu_flush_cache_page(void *addr)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K);
+}
 
 int nr_iommus;
 /* context entry handling */
@@ -119,11 +123,11 @@ static struct context_entry * device_to_
             return NULL;
         }
         memset((void *) phy_addr, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, (void *)phy_addr);
+        iommu_flush_cache_page((void *)phy_addr);
         phy_addr = virt_to_maddr((void *)phy_addr);
         set_root_value(*root, phy_addr);
         set_root_present(*root);
-        iommu_flush_cache_entry(iommu, root);
+        iommu_flush_cache_entry(root);
     }
     phy_addr = (unsigned long) get_context_addr(*root);
     context = (struct context_entry *)maddr_to_virt(phy_addr);
@@ -157,8 +161,6 @@ static struct page_info *addr_to_dma_pag
 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *parent, *pte = NULL, *pgd;
     int level = agaw_to_level(hd->agaw);
@@ -166,9 +168,6 @@ static struct page_info *addr_to_dma_pag
     unsigned long flags;
     struct page_info *pg = NULL;
     u64 *vaddr = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
@@ -200,7 +199,7 @@ static struct page_info *addr_to_dma_pag
                 return NULL;
             }
             memset(vaddr, 0, PAGE_SIZE);
-            iommu_flush_cache_page(iommu, vaddr);
+            iommu_flush_cache_page(vaddr);
 
             dma_set_pte_addr(*pte, page_to_maddr(pg));
 
@@ -210,7 +209,7 @@ static struct page_info *addr_to_dma_pag
              */
             dma_set_pte_readable(*pte);
             dma_set_pte_writable(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
         }
         else
         {
@@ -549,8 +548,6 @@ static void dma_pte_clear_one(struct dom
     struct dma_pte *pte = NULL;
     struct page_info *pg = NULL;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-
     /* get last level pte */
     pg = dma_addr_level_page(domain, addr, 1);
     if ( !pg )
@@ -560,7 +557,7 @@ static void dma_pte_clear_one(struct dom
     if ( pte )
     {
         dma_clear_pte(*pte);
-        iommu_flush_cache_entry(drhd->iommu, pte);
+        iommu_flush_cache_entry(pte);
 
         for_each_drhd_unit ( drhd )
         {
@@ -602,18 +599,13 @@ static void dma_pte_clear_range(struct d
 /* free page table pages. last level pte should already be cleared */
 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
 {
-    struct acpi_drhd_unit *drhd;
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
     u64 tmp;
     struct page_info *pg = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     start &= (((u64)1) << addr_width) - 1;
     end &= (((u64)1) << addr_width) - 1;
@@ -637,7 +629,7 @@ void dma_pte_free_pagetable(struct domai
             pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
             pte += address_level_offset(tmp, level);
             dma_clear_pte(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
             unmap_domain_page(pte);
             free_domheap_page(pg);
 
@@ -677,7 +669,7 @@ static int iommu_set_root_entry(struct i
             return -ENOMEM;
 
         memset((u8*)root, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, root);
+        iommu_flush_cache_page(root);
 
         if ( cmpxchg((unsigned long *)&iommu->root_entry,
                      0, (unsigned long)root) != 0 )
@@ -963,6 +955,8 @@ struct iommu *iommu_alloc(void *hw_data)
 {
     struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
     struct iommu *iommu;
+    unsigned long sagaw;
+    int agaw;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -991,6 +985,23 @@ struct iommu *iommu_alloc(void *hw_data)
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+    /* Calculate number of pagetable levels: between 2 and 4. */
+    sagaw = cap_sagaw(iommu->cap);
+    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
+        if ( test_bit(agaw, &sagaw) )
+            break;
+    if ( agaw < 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "IOMMU: unsupported sagaw %lx\n", sagaw);
+        xfree(iommu);
+        return NULL;
+    }
+    iommu->nr_pt_levels = agaw_to_level(agaw);
+
+    if ( !ecap_coherent(iommu->ecap) )
+        iommus_incoherent = 1;
 
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
@@ -1025,9 +1036,6 @@ int iommu_domain_init(struct domain *dom
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct iommu *iommu = NULL;
-    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int adjust_width, agaw;
-    unsigned long sagaw;
     struct acpi_drhd_unit *drhd;
 
     spin_lock_init(&hd->mapping_lock);
@@ -1041,22 +1049,7 @@ int iommu_domain_init(struct domain *dom
     for_each_drhd_unit ( drhd )
         iommu = drhd->iommu ? : iommu_alloc(drhd);
 
-    /* calculate AGAW */
-    if (guest_width > cap_mgaw(iommu->cap))
-        guest_width = cap_mgaw(iommu->cap);
-    adjust_width = guestwidth_to_adjustwidth(guest_width);
-    agaw = width_to_agaw(adjust_width);
-    /* FIXME: hardware doesn't support it, choose a bigger one? */
-    sagaw = cap_sagaw(iommu->cap);
-    if ( !test_bit(agaw, &sagaw) )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "IOMMU: hardware doesn't support the agaw\n");
-        agaw = find_next_bit(&sagaw, 5, agaw);
-        if ( agaw >= 5 )
-            return -ENODEV;
-    }
-    hd->agaw = agaw;
+    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
     return 0;
 }
 
@@ -1069,6 +1062,8 @@ static int domain_context_mapping_one(
     struct context_entry *context;
     unsigned long flags;
     int ret = 0;
+    u64 pgd_maddr;
+    int agaw = -1;
 
     context = device_to_context_entry(iommu, bus, devfn);
     if ( !context )
@@ -1089,36 +1084,54 @@ static int domain_context_mapping_one(
     }
 
     spin_lock_irqsave(&iommu->lock, flags);
+
+    if ( ecap_pass_thru(iommu->ecap) )
+        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+    else
+    {
+        /* Ensure we have pagetables allocated down to leaf PTE. */
+        if ( !hd->pgd )
+        {
+            addr_to_dma_page(domain, 0);
+            if ( !hd->pgd )
+            {
+            nomem:
+                spin_unlock_irqrestore(&hd->mapping_lock, flags);
+                return -ENOMEM;
+            }
+        }
+ 
+        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
+        pgd_maddr = virt_to_maddr(hd->pgd);
+        for ( agaw = level_to_agaw(4);
+              agaw != level_to_agaw(iommu->nr_pt_levels);
+              agaw-- )
+        {
+            if ( agaw == level_to_agaw(4) )
+                pgd_maddr = dma_pte_addr(*hd->pgd);
+            else
+            {
+                struct dma_pte *p = map_domain_page(pgd_maddr);
+                pgd_maddr = dma_pte_addr(*p);
+                unmap_domain_page(p);
+                if ( pgd_maddr == 0 )
+                    goto nomem;
+            } 
+        }
+        context_set_address_root(*context, pgd_maddr);
+        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+    }
+
     /*
      * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
      * be 1 based as required by intel's iommu hw.
      */
+    BUG_ON(agaw == -1);
     context_set_domain_id(context, domain);
-    context_set_address_width(*context, hd->agaw);
-
-    if ( ecap_pass_thru(iommu->ecap) )
-        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
-    else
-    {
-        if ( !hd->pgd )
-        {
-            struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return -ENOMEM;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-        }
- 
-        context_set_address_root(*context, virt_to_maddr(hd->pgd));
-        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-    }
-
+    context_set_address_width(*context, agaw);
     context_set_fault_enable(*context);
     context_set_present(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
 
     gdprintk(XENLOG_INFO VTDPREFIX,
              "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
@@ -1315,7 +1328,7 @@ static int domain_context_unmap_one(
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
     spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1558,7 +1571,7 @@ int iommu_map_page(struct domain *d, pad
     pte += gfn & LEVEL_MASK;
     dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
     unmap_domain_page(pte);
 
     for_each_drhd_unit ( drhd )
@@ -1606,8 +1619,6 @@ int iommu_page_mapping(struct domain *do
     int index;
     struct page_info *pg = NULL;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
@@ -1624,7 +1635,7 @@ int iommu_page_mapping(struct domain *do
         pte += start_pfn & LEVEL_MASK;
         dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
-        iommu_flush_cache_entry(iommu, pte);
+        iommu_flush_cache_entry(pte);
         unmap_domain_page(pte);
         start_pfn++;
         index++;
@@ -1675,7 +1686,7 @@ void iommu_flush(struct domain *d, dma_a
             iommu_flush_write_buffer(iommu);
     }
 
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
 }
 
 static int iommu_prepare_rmrr_dev(
diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/hvm/vmx/intel-iommu.h
--- a/xen/include/asm-x86/hvm/vmx/intel-iommu.h Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/include/asm-x86/hvm/vmx/intel-iommu.h Tue Jul 29 16:13:15 2008 +0100
@@ -232,6 +232,7 @@ struct context_entry {
 /* page table handling */
 #define LEVEL_STRIDE       (9)
 #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
+#define level_to_agaw(val) ((val) - 2)
 #define agaw_to_level(val) ((val) + 2)
 #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
 #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/iommu.h
--- a/xen/include/asm-x86/iommu.h       Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/include/asm-x86/iommu.h       Tue Jul 29 16:13:15 2008 +0100
@@ -56,6 +56,7 @@ struct iommu {
     void __iomem *reg; /* Pointer to hardware regs, virtual addr */
     u32        index;         /* Sequence number of iommu */
     u32        gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+    u32        nr_pt_levels;
     u64        cap;
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-3.2-testing] Handle DRHDs with different supported AGAWs., Xen patchbot-3.2-testing <=