WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] VT-d: Allocates page table pgd, root_entr

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] VT-d: Allocates page table pgd, root_entry, iremap and qinval from
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 10 Apr 2008 04:10:12 -0700
Delivery-date: Thu, 10 Apr 2008 04:10:26 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1207815758 -3600
# Node ID 1d3aaa6a8b870805e16dcf162223fb2edd9de26d
# Parent  85848be18ba22814bddeb82a4cfc99e14447cab1
VT-d: Allocates page table pgd, root_entry, iremap and qinval from
domheap rather than xenheap, and get rid of structure page_info in
iommu.c.

Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/intremap.c |   51 ++-
 xen/drivers/passthrough/vtd/iommu.c    |  458 +++++++++------------------------
 xen/drivers/passthrough/vtd/iommu.h    |    4 
 xen/drivers/passthrough/vtd/qinval.c   |   94 +++---
 xen/drivers/passthrough/vtd/utils.c    |   40 +-
 xen/drivers/passthrough/vtd/x86/vtd.c  |  177 ++++++++++++
 xen/include/xen/hvm/iommu.h            |    2 
 xen/include/xen/iommu.h                |    3 
 8 files changed, 421 insertions(+), 408 deletions(-)

diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Thu Apr 10 09:22:38 2008 +0100
@@ -45,7 +45,7 @@ static void remap_entry_to_ioapic_rte(
 static void remap_entry_to_ioapic_rte(
     struct iommu *iommu, struct IO_APIC_route_entry *old_rte)
 {
-    struct iremap_entry *iremap_entry = NULL;
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int index;
     unsigned long flags;
@@ -70,7 +70,9 @@ static void remap_entry_to_ioapic_rte(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
-    iremap_entry = &ir_ctrl->iremap[index];
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
 
     old_rte->vector = iremap_entry->lo.vector;
     old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -80,13 +82,14 @@ static void remap_entry_to_ioapic_rte(
     old_rte->dest.logical.__reserved_1 = 0;
     old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
 
+    unmap_vtd_domain_page(iremap_entries);
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
 }
 
 static void ioapic_rte_to_remap_entry(struct iommu *iommu,
     int apic_id, struct IO_APIC_route_entry *old_rte)
 {
-    struct iremap_entry *iremap_entry = NULL;
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int index;
     unsigned long flags;
@@ -103,7 +106,10 @@ static void ioapic_rte_to_remap_entry(st
         goto out;
     }
 
-    iremap_entry = &(ir_ctrl->iremap[index]);
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
+
     if ( *(u64 *)iremap_entry != 0 )
         dprintk(XENLOG_WARNING VTDPREFIX,
                "Interrupt remapping entry is in use already!\n");
@@ -124,12 +130,13 @@ static void ioapic_rte_to_remap_entry(st
     iremap_entry->lo.p = 1;    /* finally, set present bit */
     ir_ctrl->iremap_index++;
 
+    unmap_vtd_domain_page(iremap_entries);
     iommu_flush_iec_index(iommu, 0, index);
     ret = invalidate_sync(iommu);
 
-    /* now construct new ioapic rte entry */ 
+    /* now construct new ioapic rte entry */
     remap_rte->vector = old_rte->vector;
-    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */ 
+    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
     remap_rte->index_15 = index & 0x8000;
     remap_rte->index_0_14 = index & 0x7fff;
     remap_rte->delivery_status = old_rte->delivery_status;
@@ -154,7 +161,7 @@ io_apic_read_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
         *IO_APIC_BASE(apic) = reg;
         return *(IO_APIC_BASE(apic)+4);
@@ -200,7 +207,7 @@ io_apic_write_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
         *IO_APIC_BASE(apic) = reg;
         *(IO_APIC_BASE(apic)+4) = value;
@@ -238,32 +245,30 @@ int intremap_setup(struct iommu *iommu)
 {
     struct ir_ctrl *ir_ctrl;
     unsigned long start_time;
-    u64 paddr;
 
     if ( !ecap_intr_remap(iommu->ecap) )
         return -ENODEV;
 
     ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( ir_ctrl->iremap == NULL )
-    {
-        ir_ctrl->iremap = alloc_xenheap_page();
-        if ( ir_ctrl->iremap == NULL )
+    if ( ir_ctrl->iremap_maddr == 0 )
+    {
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr();
+        if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
-                    "Cannot allocate memory for ir_ctrl->iremap\n");
-            return -ENODEV;
-        }
-        memset(ir_ctrl->iremap, 0, PAGE_SIZE);
-    }
-
-    paddr = virt_to_maddr(ir_ctrl->iremap);
+                    "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
+            return -ENODEV;
+        }
+    }
+
 #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
     /* set extended interrupt mode bit */
-    paddr |= ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
+    ir_ctrl->iremap_maddr |=
+            ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
 #endif
     /* size field = 256 entries per 4K page = 8 - 1 */
-    paddr |= 7;
-    dmar_writeq(iommu->reg, DMAR_IRTA_REG, paddr);
+    ir_ctrl->iremap_maddr |= 7;
+    dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
 
     /* set SIRTP */
     iommu->gcmd |= DMA_GCMD_SIRTP;
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 10 09:22:38 2008 +0100
@@ -185,71 +185,70 @@ void iommu_flush_cache_page(struct iommu
 
 int nr_iommus;
 /* context entry handling */
-static struct context_entry * device_to_context_entry(struct iommu *iommu,
-                                                      u8 bus, u8 devfn)
-{
-    struct root_entry *root;
-    struct context_entry *context;
-    unsigned long phy_addr;
+static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
+{
+    struct root_entry *root, *root_entries;
     unsigned long flags;
+    u64 maddr;
 
     spin_lock_irqsave(&iommu->lock, flags);
-    root = &iommu->root_entry[bus];
+    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    root = &root_entries[bus];
     if ( !root_present(*root) )
     {
-        phy_addr = (unsigned long) alloc_xenheap_page();
-        if ( !phy_addr )
+        maddr = alloc_pgtable_maddr();
+        if ( maddr == 0 )
         {
             spin_unlock_irqrestore(&iommu->lock, flags);
-            return NULL;
+            return 0;
         }
-        memset((void *) phy_addr, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, (void *)phy_addr);
-        phy_addr = virt_to_maddr((void *)phy_addr);
-        set_root_value(*root, phy_addr);
+        set_root_value(*root, maddr);
         set_root_present(*root);
         iommu_flush_cache_entry(iommu, root);
     }
-    phy_addr = (unsigned long) get_context_addr(*root);
-    context = (struct context_entry *)maddr_to_virt(phy_addr);
+    maddr = (u64) get_context_addr(*root);
+    unmap_vtd_domain_page(root_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
-    return &context[devfn];
+    return maddr;
 }
 
 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
 {
-    struct root_entry *root;
+    struct root_entry *root, *root_entries;
     struct context_entry *context;
-    unsigned long phy_addr;
+    u64 context_maddr;
     int ret;
     unsigned long flags;
 
     spin_lock_irqsave(&iommu->lock, flags);
-    root = &iommu->root_entry[bus];
+    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    root = &root_entries[bus];
     if ( !root_present(*root) )
     {
         ret = 0;
         goto out;
     }
-    phy_addr = get_context_addr(*root);
-    context = (struct context_entry *)maddr_to_virt(phy_addr);
+    context_maddr = get_context_addr(*root);
+    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
     ret = context_present(context[devfn]);
+    unmap_vtd_domain_page(context);
  out:
+    unmap_vtd_domain_page(root_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
     return ret;
 }
 
-static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
+static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
-    struct dma_pte *parent, *pte = NULL, *pgd;
+    struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
     unsigned long flags;
-    struct page_info *pg = NULL;
+    u64 pte_maddr = 0;
     u64 *vaddr = NULL;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -257,19 +256,14 @@ static struct page_info *addr_to_dma_pag
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
-    if ( !hd->pgd )
-    {
-        pgd = (struct dma_pte *)alloc_xenheap_page();
-        if ( !pgd )
-        {
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
-            return NULL;
-        }
-        memset(pgd, 0, PAGE_SIZE);
-        hd->pgd = pgd;
-    }
-
-    parent = hd->pgd;
+    if ( hd->pgd_maddr == 0 )
+    {
+        hd->pgd_maddr = alloc_pgtable_maddr();
+        if ( hd->pgd_maddr == 0 )
+            return 0;
+    }
+
+    parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
     while ( level > 1 )
     {
         offset = address_level_offset(addr, level);
@@ -277,18 +271,15 @@ static struct page_info *addr_to_dma_pag
 
         if ( dma_pte_addr(*pte) == 0 )
         {
-            pg = alloc_domheap_page(
-                NULL, MEMF_node(domain_to_node(domain)));
-            vaddr = map_domain_page(page_to_mfn(pg));
+            u64 maddr = alloc_pgtable_maddr();
+            dma_set_pte_addr(*pte, maddr);
+            vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
             {
+                unmap_vtd_domain_page(parent);
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return NULL;
+                return 0;
             }
-            memset(vaddr, 0, PAGE_SIZE);
-            iommu_flush_cache_page(iommu, vaddr);
-
-            dma_set_pte_addr(*pte, page_to_maddr(pg));
 
             /*
              * high level table always sets r/w, last level
@@ -300,21 +291,20 @@ static struct page_info *addr_to_dma_pag
         }
         else
         {
-            pg = maddr_to_page(pte->val);
-            vaddr = map_domain_page(page_to_mfn(pg));
+            vaddr = map_vtd_domain_page(pte->val);
             if ( !vaddr )
             {
+                unmap_vtd_domain_page(parent);
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return NULL;
+                return 0;
             }
         }
 
-        if ( parent != hd->pgd )
-            unmap_domain_page(parent);
-
-        if ( level == 2 && vaddr )
+        unmap_vtd_domain_page(parent);
+        if ( level == 2 )
         {
-            unmap_domain_page(vaddr);
+            pte_maddr = pte->val & PAGE_MASK_4K;
+            unmap_vtd_domain_page(vaddr);
             break;
         }
 
@@ -324,43 +314,42 @@ static struct page_info *addr_to_dma_pag
     }
 
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
-    return pg;
+    return pte_maddr;
 }
 
 /* return address's page at specific level */
-static struct page_info *dma_addr_level_page(struct domain *domain,
-                                             u64 addr, int level)
+static u64 dma_addr_level_page_maddr(
+    struct domain *domain, u64 addr, int level)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct dma_pte *parent, *pte = NULL;
     int total = agaw_to_level(hd->agaw);
     int offset;
-    struct page_info *pg = NULL;
-
-    parent = hd->pgd;
+    u64 pg_maddr = hd->pgd_maddr;
+
+    if ( pg_maddr == 0 )
+        return 0;
+
+    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     while ( level <= total )
     {
         offset = address_level_offset(addr, total);
         pte = &parent[offset];
         if ( dma_pte_addr(*pte) == 0 )
-        {
-            if ( parent != hd->pgd )
-                unmap_domain_page(parent);
             break;
-        }
-
-        pg = maddr_to_page(pte->val);
-        if ( parent != hd->pgd )
-            unmap_domain_page(parent);
+
+        pg_maddr = pte->val & PAGE_MASK_4K;
+        unmap_vtd_domain_page(parent);
 
         if ( level == total )
-            return pg;
-
-        parent = map_domain_page(page_to_mfn(pg));
+            return pg_maddr;
+
+        parent = map_vtd_domain_page(pte->val);
         total--;
     }
 
-    return NULL;
+    unmap_vtd_domain_page(parent);
+    return 0;
 }
 
 static void iommu_flush_write_buffer(struct iommu *iommu)
@@ -639,17 +628,17 @@ static void dma_pte_clear_one(struct dom
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    struct dma_pte *pte = NULL;
-    struct page_info *pg = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 
     /* get last level pte */
-    pg = dma_addr_level_page(domain, addr, 1);
-    if ( !pg )
+    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
+    if ( pg_maddr == 0 )
         return;
-    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-    pte += address_level_offset(addr, 1);
+    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+    pte = page + address_level_offset(addr, 1);
     if ( pte )
     {
         dma_clear_pte(*pte);
@@ -665,7 +654,7 @@ static void dma_pte_clear_one(struct dom
                 iommu_flush_write_buffer(iommu);
         }
     }
-    unmap_domain_page(pte);
+    unmap_vtd_domain_page(page);
 }
 
 /* clear last level pte, a tlb flush should be followed */
@@ -695,11 +684,11 @@ void dma_pte_free_pagetable(struct domai
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
-    struct dma_pte *pte;
+    struct dma_pte *page, *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
     u32 tmp;
-    struct page_info *pg = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -717,15 +706,15 @@ void dma_pte_free_pagetable(struct domai
 
         while ( tmp < end )
         {
-            pg = dma_addr_level_page(domain, tmp, level);
-            if ( !pg )
+            pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
+            if ( pg_maddr == 0 )
                 return;
-            pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-            pte += address_level_offset(tmp, level);
+            page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+            pte = page + address_level_offset(tmp, level);
             dma_clear_pte(*pte);
             iommu_flush_cache_entry(iommu, pte);
-            unmap_domain_page(pte);
-            free_domheap_page(pg);
+            unmap_vtd_domain_page(page);
+            free_pgtable_maddr(pg_maddr);
 
             tmp += level_size(level);
         }
@@ -735,17 +724,15 @@ void dma_pte_free_pagetable(struct domai
     /* free pgd */
     if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
     {
-        free_xenheap_page((void *)hd->pgd);
-        hd->pgd = NULL;
+        free_pgtable_maddr(hd->pgd_maddr);
+        hd->pgd_maddr = 0;
     }
 }
 
 /* iommu handling */
 static int iommu_set_root_entry(struct iommu *iommu)
 {
-    void *addr;
     u32 cmd, sts;
-    struct root_entry *root;
     unsigned long flags;
 
     if ( iommu == NULL )
@@ -755,25 +742,19 @@ static int iommu_set_root_entry(struct i
         return -EINVAL;
     }
 
-    if ( unlikely(!iommu->root_entry) )
-    {
-        root = (struct root_entry *)alloc_xenheap_page();
-        if ( root == NULL )
-            return -ENOMEM;
-
-        memset((u8*)root, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, root);
-
-        if ( cmpxchg((unsigned long *)&iommu->root_entry,
-                     0, (unsigned long)root) != 0 )
-            free_xenheap_page((void *)root);
-    }
-
-    addr = iommu->root_entry;
+    if ( iommu->root_maddr != 0 )
+    {
+        free_pgtable_maddr(iommu->root_maddr);
+        iommu->root_maddr = 0;
+    }
 
     spin_lock_irqsave(&iommu->register_lock, flags);
 
-    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
+    iommu->root_maddr = alloc_pgtable_maddr();
+    if ( iommu->root_maddr == 0 )
+        return -ENOMEM;
+
+    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
     cmd = iommu->gcmd | DMA_GCMD_SRTP;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
 
@@ -1110,8 +1091,11 @@ static void free_iommu(struct iommu *iom
 {
     if ( !iommu )
         return;
-    if ( iommu->root_entry )
-        free_xenheap_page((void *)iommu->root_entry);
+    if ( iommu->root_maddr != 0 )
+    {
+        free_pgtable_maddr(iommu->root_maddr);
+        iommu->root_maddr = 0;
+    }
     if ( iommu->reg )
         iounmap(iommu->reg);
     free_intel_iommu(iommu->intel);
@@ -1166,13 +1150,17 @@ static int domain_context_mapping_one(
     u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct context_entry *context;
+    struct context_entry *context, *context_entries;
     unsigned long flags;
     int ret = 0;
-
-    context = device_to_context_entry(iommu, bus, devfn);
+    u64 maddr;
+
+    maddr = bus_to_context_maddr(iommu, bus);
+    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+    context = &context_entries[devfn];
     if ( !context )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "domain_context_mapping_one:context == NULL:"
                  "bdf = %x:%x:%x\n",
@@ -1182,6 +1170,7 @@ static int domain_context_mapping_one(
 
     if ( context_present(*context) )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_WARNING VTDPREFIX,
                  "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1202,19 +1191,8 @@ static int domain_context_mapping_one(
     else
     {
 #endif
-        if ( !hd->pgd )
-        {
-            struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return -ENOMEM;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-        }
- 
-        context_set_address_root(*context, virt_to_maddr(hd->pgd));
+        ASSERT(hd->pgd_maddr != 0);
+        context_set_address_root(*context, hd->pgd_maddr);
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
 #ifdef CONTEXT_PASSTHRU
     }
@@ -1226,9 +1204,11 @@ static int domain_context_mapping_one(
 
     gdprintk(XENLOG_INFO VTDPREFIX,
              "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
-             " hd->pgd=%p\n",
+             " hd->pgd_maddr=%"PRIx64"\n",
              bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             context->hi, context->lo, hd->pgd);
+             context->hi, context->lo, hd->pgd_maddr);
+
+    unmap_vtd_domain_page(context_entries);
 
     if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
                                     (((u16)bus) << 8) | devfn,
@@ -1389,12 +1369,16 @@ static int domain_context_unmap_one(
     struct iommu *iommu,
     u8 bus, u8 devfn)
 {
-    struct context_entry *context;
+    struct context_entry *context, *context_entries;
     unsigned long flags;
-
-    context = device_to_context_entry(iommu, bus, devfn);
+    u64 maddr;
+
+    maddr = bus_to_context_maddr(iommu, bus);
+    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+    context = &context_entries[devfn];
     if ( !context )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1403,6 +1387,7 @@ static int domain_context_unmap_one(
 
     if ( !context_present(*context) )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_WARNING VTDPREFIX,
                  "domain_context_unmap_one-%x:%x:%x- "
                  "context NOT present:return\n",
@@ -1420,6 +1405,7 @@ static int domain_context_unmap_one(
     iommu_flush_cache_entry(iommu, context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
+    unmap_vtd_domain_page(context_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
@@ -1575,36 +1561,7 @@ void iommu_domain_teardown(struct domain
         return;
 
     iommu_domid_release(d);
-
-#if CONFIG_PAGING_LEVELS == 3
-    {
-        struct hvm_iommu *hd  = domain_hvm_iommu(d);
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *pgd = NULL;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            if ( hd->pgd )
-                free_xenheap_page((void *)hd->pgd);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            if ( hd->pgd )
-            {
-                pgd = hd->pgd;
-                if ( pgd[0].val != 0 )
-                    free_xenheap_page((void*)maddr_to_virt(
-                        dma_pte_addr(pgd[0])));
-                free_xenheap_page((void *)hd->pgd);
-            }
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-#endif
+    iommu_free_pgd(d);
     return_devices_to_dom0(d);
 }
 
@@ -1630,8 +1587,8 @@ int intel_iommu_map_page(
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    struct dma_pte *pte = NULL;
-    struct page_info *pg = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -1642,15 +1599,15 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg = addr_to_dma_page(d, (paddr_t)gfn << PAGE_SHIFT_4K);
-    if ( !pg )
+    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
+    if ( pg_maddr == 0 )
         return -ENOMEM;
-    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-    pte += gfn & LEVEL_MASK;
+    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+    pte = page + (gfn & LEVEL_MASK);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
     iommu_flush_cache_entry(iommu, pte);
-    unmap_domain_page(pte);
+    unmap_vtd_domain_page(page);
 
     for_each_drhd_unit ( drhd )
     {
@@ -1690,9 +1647,9 @@ int iommu_page_mapping(struct domain *do
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     unsigned long start_pfn, end_pfn;
-    struct dma_pte *pte = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
     int index;
-    struct page_info *pg = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -1705,15 +1662,15 @@ int iommu_page_mapping(struct domain *do
     index = 0;
     while ( start_pfn < end_pfn )
     {
-        pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
-        if ( !pg )
+        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
+        if ( pg_maddr == 0 )
             return -ENOMEM;
-        pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-        pte += start_pfn & LEVEL_MASK;
+        page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+        pte = page + (start_pfn & LEVEL_MASK);
         dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
         iommu_flush_cache_entry(iommu, pte);
-        unmap_domain_page(pte);
+        unmap_vtd_domain_page(page);
         start_pfn++;
         index++;
     }
@@ -2050,159 +2007,6 @@ int intel_iommu_assign_device(struct dom
 
     return ret;
 }
-
-void iommu_set_pgd(struct domain *d)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    unsigned long p2m_table;
-
-    if ( hd->pgd )
-    {
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
-        hd->pgd = NULL;
-    }
-    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
-
-    if ( paging_mode_hap(d) )
-    {
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *dpte = NULL;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            dpte = map_domain_page(p2m_table);
-            if ( !dma_pte_present(*dpte) )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(dpte);
-                return;
-            }
-            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            unmap_domain_page(dpte);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-    else
-    {
-#if CONFIG_PAGING_LEVELS == 3
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *pmd = NULL;
-        struct dma_pte *pgd = NULL;
-        struct dma_pte *pte = NULL;
-        l3_pgentry_t *l3e;
-        unsigned long flags;
-        int i;
-
-        spin_lock_irqsave(&hd->mapping_lock, flags);
-        if ( !hd->pgd )
-        {
-            pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pgd memory failed!\n");
-                return;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-       }
-
-        l3e = map_domain_page(p2m_table);
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
-            /* We only support 8 entries for the PAE L3 p2m table */
-            for ( i = 0; i < 8 ; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
-            /* We allocate one more page for the top vtd page table. */
-            pmd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pmd )
-            {
-                unmap_domain_page(l3e);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pmd memory failed!\n");
-                return;
-            }
-            memset((u8*)pmd, 0, PAGE_SIZE);
-            pte = &pgd[0];
-            dma_set_pte_addr(*pte, virt_to_maddr(pmd));
-            dma_set_pte_readable(*pte);
-            dma_set_pte_writable(*pte);
-
-            for ( i = 0; i < 8; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-        unmap_domain_page(l3e);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
-#elif CONFIG_PAGING_LEVELS == 4
-        int level = agaw_to_level(hd->agaw);
-        l3_pgentry_t *l3e;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            l3e = map_domain_page(p2m_table);
-            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(l3e);
-                return;
-            }
-            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            unmap_domain_page(l3e);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-#endif
-    }
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
-}
-
 
 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
 int iommu_suspend(void)
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h       Thu Apr 10 09:22:38 2008 +0100
@@ -425,7 +425,7 @@ extern struct list_head acpi_ioapic_unit
 extern struct list_head acpi_ioapic_units;
 
 struct qi_ctrl {
-    struct qinval_entry *qinval;         /* queue invalidation page */
+    u64 qinval_maddr;  /* queue invalidation page machine address */
     int qinval_index;                    /* queue invalidation index */
     spinlock_t qinval_lock;      /* lock for queue invalidation page */
     spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */
@@ -433,7 +433,7 @@ struct qi_ctrl {
 };
 
 struct ir_ctrl {
-    struct iremap_entry *iremap; /* interrupt remap table */
+    u64 iremap_maddr;            /* interrupt remap table machine address */
     int iremap_index;            /* interrupt remap index */
     spinlock_t iremap_lock;      /* lock for irq remappping table */
 };
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Thu Apr 10 09:22:38 2008 +0100
@@ -63,13 +63,14 @@ static int gen_cc_inv_dsc(struct iommu *
 static int gen_cc_inv_dsc(struct iommu *iommu, int index,
     u16 did, u16 source_id, u8 function_mask, u8 granu)
 {
-    u64 *ptr64;
-    unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
-    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
-    qinval_entry = &qi_ctrl->qinval[index];
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
     qinval_entry->q.cc_inv_dsc.lo.granu = granu;
     qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -78,9 +79,10 @@ static int gen_cc_inv_dsc(struct iommu *
     qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
     qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
     qinval_entry->q.cc_inv_dsc.hi.res = 0;
-    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
-
-    ptr64 = (u64 *)qinval_entry;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+
     return 0;
 }
 
@@ -93,7 +95,7 @@ int queue_invalidate_context(struct iomm
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     index = qinval_next_index(iommu);
-    if (index == -1)
+    if ( index == -1 )
         return -EBUSY;
     ret = gen_cc_inv_dsc(iommu, index, did, source_id,
                          function_mask, granu);
@@ -106,14 +108,16 @@ static int gen_iotlb_inv_dsc(struct iomm
     u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
     qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
     qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
@@ -127,6 +131,7 @@ static int gen_iotlb_inv_dsc(struct iomm
     qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
     qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -151,15 +156,16 @@ static int gen_wait_dsc(struct iommu *io
 static int gen_wait_dsc(struct iommu *iommu, int index,
     u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
 {
-    u64 *ptr64;
-    unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
     qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
     qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -168,8 +174,8 @@ static int gen_wait_dsc(struct iommu *io
     qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
     qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
     qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
-    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
-    ptr64 = (u64 *)qinval_entry;
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
 
@@ -185,7 +191,7 @@ static int queue_invalidate_wait(struct 
     spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
     spin_lock_irqsave(&iommu->register_lock, flags);
     index = qinval_next_index(iommu);
-    if (*saddr == 1)
+    if ( *saddr == 1 )
         *saddr = 0;
     ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
     ret |= qinval_update_qtail(iommu, index);
@@ -196,8 +202,10 @@ static int queue_invalidate_wait(struct 
     {
         /* In case all wait descriptor writes to same addr with same data */
         start_time = jiffies;
-        while ( *saddr != 1 ) {
-            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
+        while ( *saddr != 1 )
+        {
+            if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+            {
                 print_qi_regs(iommu);
                 panic("queue invalidate wait descriptor was not executed\n");
             }
@@ -213,7 +221,7 @@ int invalidate_sync(struct iommu *iommu)
     int ret = -1;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
-    if (qi_ctrl->qinval)
+    if ( qi_ctrl->qinval_maddr == 0 )
     {
         ret = queue_invalidate_wait(iommu,
             0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
@@ -226,14 +234,16 @@ static int gen_dev_iotlb_inv_dsc(struct 
     u32 max_invs_pend, u16 sid, u16 size, u64 addr)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -244,6 +254,7 @@ static int gen_dev_iotlb_inv_dsc(struct 
     qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
     qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -268,14 +279,16 @@ static int gen_iec_inv_dsc(struct iommu 
     u8 granu, u8 im, u16 iidx)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
     qinval_entry->q.iec_inv_dsc.lo.granu = granu;
     qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -284,6 +297,7 @@ static int gen_iec_inv_dsc(struct iommu 
     qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
     qinval_entry->q.iec_inv_dsc.hi.res = 0;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -349,7 +363,7 @@ static int flush_context_qi(
             did = 0;
     }
 
-    if (qi_ctrl->qinval)
+    if ( qi_ctrl->qinval_maddr != 0 )
     {
         ret = queue_invalidate_context(iommu, did, sid, fm,
                                        type >> DMA_CCMD_INVL_GRANU_OFFSET);
@@ -382,7 +396,8 @@ static int flush_iotlb_qi(
             did = 0;
     }
 
-    if (qi_ctrl->qinval) {
+    if ( qi_ctrl->qinval_maddr != 0 )
+    {
         /* use queued invalidation */
         if (cap_write_drain(iommu->cap))
             dw = 1;
@@ -400,7 +415,6 @@ int qinval_setup(struct iommu *iommu)
 int qinval_setup(struct iommu *iommu)
 {
     unsigned long start_time;
-    u64 paddr;
     u32 status = 0;
     struct qi_ctrl *qi_ctrl;
     struct iommu_flush *flush;
@@ -411,15 +425,14 @@ int qinval_setup(struct iommu *iommu)
     if ( !ecap_queued_inval(iommu->ecap) )
         return -ENODEV;
 
-    if (qi_ctrl->qinval == NULL) {
-        qi_ctrl->qinval = alloc_xenheap_page();
-        if (qi_ctrl->qinval == NULL)
-            panic("Cannot allocate memory for qi_ctrl->qinval\n");
-        memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
+    if ( qi_ctrl->qinval_maddr == 0 )
+    {
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
+        if ( qi_ctrl->qinval_maddr == 0 )
+            panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
         flush->context = flush_context_qi;
         flush->iotlb = flush_iotlb_qi;
     }
-    paddr = virt_to_maddr(qi_ctrl->qinval);
 
     /* Setup Invalidation Queue Address(IQA) register with the
      * address of the page we just allocated.  QS field at
@@ -428,7 +441,7 @@ int qinval_setup(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
-    dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
+    dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
 
     /* enable queued invalidation hardware */
     iommu->gcmd |= DMA_GCMD_QIE;
@@ -436,11 +449,12 @@ int qinval_setup(struct iommu *iommu)
 
     /* Make sure hardware complete it */
     start_time = jiffies;
-    while (1) {
+    while ( 1 )
+    {
         status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
-        if (status & DMA_GSTS_QIES)
+        if ( status & DMA_GSTS_QIES )
             break;
-        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+        if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
             panic("Cannot set QIE field for queue invalidation\n");
         cpu_relax();
     }
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/utils.c       Thu Apr 10 09:22:38 2008 +0100
@@ -25,6 +25,7 @@
 #include "../pci-direct.h"
 #include "../pci_regs.h"
 #include "msi.h"
+#include "vtd.h"
 
 #define INTEL   0x8086
 #define SEABURG 0x4000
@@ -243,7 +244,7 @@ u32 get_level_index(unsigned long gmfn, 
 }
 
 void print_vtd_entries(
-    struct domain *d, 
+    struct domain *d,
     struct iommu *iommu,
     int bus, int devfn,
     unsigned long gmfn)
@@ -261,37 +262,40 @@ void print_vtd_entries(
     printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
            d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
 
-    if ( hd->pgd == NULL )
-    {
-        printk("    hg->pgd == NULL\n");
+    if ( hd->pgd_maddr == 0 )
+    {
+        printk("    hd->pgd_maddr == 0\n");
         return;
     }
-    printk("    d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
-           hd->pgd, virt_to_maddr(hd->pgd));
+    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
 
     for_each_drhd_unit ( drhd )
     {
         printk("---- print_vtd_entries %d ----\n", i++);
 
-        root_entry = iommu->root_entry;
-        if ( root_entry == NULL )
-        {
-            printk("    root_entry == NULL\n");
-            continue;
-        }
-
+        if ( iommu->root_maddr == 0 )
+        {
+            printk("    iommu->root_maddr = 0\n");
+            continue;
+        }
+
+        root_entry =
+            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+ 
         printk("    root_entry = %p\n", root_entry);
         printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
         if ( !root_present(root_entry[bus]) )
         {
+            unmap_vtd_domain_page(root_entry);
             printk("    root_entry[%x] not present\n", bus);
             continue;
         }
 
         ctxt_entry =
-            maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
+            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
         if ( ctxt_entry == NULL )
         {
+            unmap_vtd_domain_page(root_entry);
             printk("    ctxt_entry == NULL\n");
             continue;
         }
@@ -301,6 +305,8 @@ void print_vtd_entries(
                devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
         if ( !context_present(ctxt_entry[devfn]) )
         {
+            unmap_vtd_domain_page(ctxt_entry);
+            unmap_vtd_domain_page(root_entry);
             printk("    ctxt_entry[%x] not present\n", devfn);
             continue;
         }
@@ -308,6 +314,8 @@ void print_vtd_entries(
         if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
              level != VTD_PAGE_TABLE_LEVEL_4)
         {
+            unmap_vtd_domain_page(ctxt_entry);
+            unmap_vtd_domain_page(root_entry);
             printk("Unsupported VTD page table level (%d)!\n", level);
             continue;
         }
@@ -319,6 +327,8 @@ void print_vtd_entries(
             printk("    l%d = %p\n", level, l);
             if ( l == NULL )
             {
+                unmap_vtd_domain_page(ctxt_entry);
+                unmap_vtd_domain_page(root_entry);
                 printk("    l%d == NULL\n", level);
                 break;
             }
@@ -329,6 +339,8 @@ void print_vtd_entries(
             pte.val = l[l_index];
             if ( !dma_pte_present(pte) )
             {
+                unmap_vtd_domain_page(ctxt_entry);
+                unmap_vtd_domain_page(root_entry);
                 printk("    l%d[%x] not present\n", level, l_index);
                 break;
             }
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 10 09:22:38 2008 +0100
@@ -20,6 +20,7 @@
 
 #include <xen/sched.h>
 #include <xen/domain_page.h>
+#include <asm/paging.h>
 #include <xen/iommu.h>
 #include "../iommu.h"
 #include "../dmar.h"
@@ -124,3 +125,179 @@ void hvm_dpci_isairq_eoi(struct domain *
         }
     }
 }
+
+void iommu_set_pgd(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+    unsigned long p2m_table;
+    int level = agaw_to_level(hd->agaw);
+    l3_pgentry_t *l3e;
+
+    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
+
+    if ( paging_mode_hap(d) )
+    {
+        int level = agaw_to_level(hd->agaw);
+        struct dma_pte *dpte = NULL;
+        mfn_t pgd_mfn;
+
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:
+            dpte = map_domain_page(p2m_table);
+            if ( !dma_pte_present(*dpte) )
+            {
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "iommu_set_pgd: second level wasn't there\n");
+                unmap_domain_page(dpte);
+                return;
+            }
+            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            unmap_domain_page(dpte);
+            break;
+        case VTD_PAGE_TABLE_LEVEL_4:
+            pgd_mfn = _mfn(p2m_table);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+    }
+    else
+    {
+#if CONFIG_PAGING_LEVELS == 3
+        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
+        int i;
+        u64 pmd_maddr;
+        unsigned long flags;
+
+        spin_lock_irqsave(&hd->mapping_lock, flags);
+        hd->pgd_maddr = alloc_pgtable_maddr();
+        if ( hd->pgd_maddr == 0 )
+        {
+            spin_unlock_irqrestore(&hd->mapping_lock, flags);
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "Allocate pgd memory failed!\n");
+            return;
+        }
+
+        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
+        l3e = map_domain_page(p2m_table);
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
+            /* We only support 8 entries for the PAE L3 p2m table */
+            for ( i = 0; i < 8 ; i++ )
+            {
+                /* Don't create new L2 entry, use ones from p2m table */
+                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+            }
+            break;
+
+        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
+            /* We allocate one more page for the top vtd page table. */
+            pmd_maddr = alloc_pgtable_maddr();
+            if ( pmd_maddr == 0 )
+            {
+                unmap_vtd_domain_page(pgd_vaddr);
+                unmap_domain_page(l3e);
+                spin_unlock_irqrestore(&hd->mapping_lock, flags);
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "Allocate pmd memory failed!\n");
+                return;
+            }
+
+            pte = &pgd_vaddr[0];
+            dma_set_pte_addr(*pte, pmd_maddr);
+            dma_set_pte_readable(*pte);
+            dma_set_pte_writable(*pte);
+
+            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
+            for ( i = 0; i < 8; i++ )
+            {
+                /* Don't create new L2 entry, use ones from p2m table */
+                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+            }
+
+            unmap_vtd_domain_page(pmd_vaddr);
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+
+        unmap_vtd_domain_page(pgd_vaddr);
+        unmap_domain_page(l3e);
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
+
+#elif CONFIG_PAGING_LEVELS == 4
+        mfn_t pgd_mfn;
+
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:
+            l3e = map_domain_page(p2m_table);
+            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+            {
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "iommu_set_pgd: second level wasn't there\n");
+                unmap_domain_page(l3e);
+                return;
+            }
+
+            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            unmap_domain_page(l3e);
+            break;
+        case VTD_PAGE_TABLE_LEVEL_4:
+            pgd_mfn = _mfn(p2m_table);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+#endif
+    }
+}
+
+void iommu_free_pgd(struct domain *d)
+{
+#if CONFIG_PAGING_LEVELS == 3
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+    int level = agaw_to_level(hd->agaw);
+    struct dma_pte *pgd_vaddr = NULL;
+
+    switch ( level )
+    {
+    case VTD_PAGE_TABLE_LEVEL_3:
+        if ( hd->pgd_maddr != 0 )
+        {
+            free_pgtable_maddr(hd->pgd_maddr);
+            hd->pgd_maddr = 0;
+        }
+        break;
+    case VTD_PAGE_TABLE_LEVEL_4:
+        if ( hd->pgd_maddr != 0 )
+        {
+            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
+            if ( pgd_vaddr[0].val != 0 )
+                free_pgtable_maddr(pgd_vaddr[0].val);
+            unmap_vtd_domain_page(pgd_vaddr);
+            free_pgtable_maddr(hd->pgd_maddr);
+            hd->pgd_maddr = 0;
+        }
+        break;
+    default:
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "Unsupported p2m table sharing level!\n");
+        break;
+    }
+#endif
+}
+
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/include/xen/hvm/iommu.h       Thu Apr 10 09:22:38 2008 +0100
@@ -38,7 +38,7 @@ struct hvm_iommu {
 struct hvm_iommu {
     spinlock_t iommu_list_lock;    /* protect iommu specific lists */
     struct list_head pdev_list;    /* direct accessed pci devices */
-    struct dma_pte *pgd;           /* io page directory root */
+    u64 pgd_maddr;                 /* io page directory machine address */
     spinlock_t mapping_lock;       /* io page table lock */
     int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/include/xen/iommu.h   Thu Apr 10 09:22:38 2008 +0100
@@ -67,7 +67,7 @@ struct iommu {
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */
     spinlock_t register_lock; /* protect iommu register handling */
-    struct root_entry *root_entry; /* virtual address */
+    u64 root_maddr; /* root entry machine address */
     unsigned int vector;
     struct intel_iommu *intel;
 };
@@ -85,6 +85,7 @@ int iommu_unmap_page(struct domain *d, u
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
 void iommu_set_pgd(struct domain *d);
+void iommu_free_pgd(struct domain *d);
 void iommu_domain_teardown(struct domain *d);
 int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
 int dpci_ioport_intercept(ioreq_t *p);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] VT-d: Allocates page table pgd, root_entry, iremap and qinval from, Xen patchbot-unstable <=