WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-3.2-testing] vtd: Enable queued invalidation method

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-3.2-testing] vtd: Enable queued invalidation method if such HW support is
From: "Xen patchbot-3.2-testing" <patchbot-3.2-testing@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 23 Jan 2008 01:12:05 -0800
Delivery-date: Wed, 23 Jan 2008 01:19:45 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1201001068 0
# Node ID 6de1c481bc7fb280038a13f15dbfba2c2ca38cee
# Parent  bf534df13095164c7b5461ee34f566efed3d6eb4
vtd: Enable queued invalidation method if such HW support is
detected.  Otherwise, register invalidation method is used.

Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
xen-unstable changeset:   16775:cc5bb500df5feda0755b865134c47f3fe9cec46d
xen-unstable date:        Tue Jan 22 09:48:51 2008 +0000
---
 xen/arch/x86/hvm/vmx/vtd/Makefile      |    1 
 xen/arch/x86/hvm/vmx/vtd/extern.h      |   55 +++
 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c |  165 ++++++++---
 xen/arch/x86/hvm/vmx/vtd/qinval.c      |  456 +++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vtd/vtd.h         |   54 +++
 xen/include/asm-x86/iommu.h            |    4 
 6 files changed, 686 insertions(+), 49 deletions(-)

diff -r bf534df13095 -r 6de1c481bc7f xen/arch/x86/hvm/vmx/vtd/Makefile
--- a/xen/arch/x86/hvm/vmx/vtd/Makefile Tue Jan 22 11:23:33 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Tue Jan 22 11:24:28 2008 +0000
@@ -2,3 +2,4 @@ obj-y += dmar.o
 obj-y += dmar.o
 obj-y += utils.o
 obj-y += io.o
+obj-y += qinval.o
diff -r bf534df13095 -r 6de1c481bc7f xen/arch/x86/hvm/vmx/vtd/extern.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/extern.h Tue Jan 22 11:24:28 2008 +0000
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx>
+ */
+
+#ifndef _VTD_EXTERN_H_
+#define _VTD_EXTERN_H_
+
+#include "dmar.h"
+
+extern int iommu_setup_done;
+extern int vtd2_thurley_enabled;
+extern int vtd2_qinval_enabled;
+
+extern spinlock_t ioapic_lock;
+extern struct qi_ctrl *qi_ctrl;
+extern struct ir_ctrl *ir_ctrl;
+
+void print_iommu_regs(struct acpi_drhd_unit *drhd);
+void print_vtd_entries(struct domain *d, struct iommu *iommu,
+                       int bus, int devfn, unsigned long gmfn);
+
+int qinval_setup(struct iommu *iommu);
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu);
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr);
+int queue_invalidate_iec(struct iommu *iommu,
+    u8 granu, u8 im, u16 iidx);
+int invalidate_sync(struct iommu *iommu);
+int iommu_flush_iec_global(struct iommu *iommu);
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void gsi_remapping(unsigned int gsi);
+void print_iommu_regs(struct acpi_drhd_unit *drhd);
+int vtd_hw_check(void);
+struct iommu * ioapic_to_iommu(unsigned int apic_id);
+struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
+void clear_fault_bits(struct iommu *iommu);
+
+#endif // _VTD_EXTERN_H_
diff -r bf534df13095 -r 6de1c481bc7f xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jan 22 11:23:33 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jan 22 11:24:28 2008 +0000
@@ -34,12 +34,9 @@
 #include "pci-direct.h"
 #include "pci_regs.h"
 #include "msi.h"
+#include "extern.h"
 
 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
-
-extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
-extern void print_vtd_entries(struct domain *d, int bus, int devfn,
-                              unsigned long gmfn);
 
 static spinlock_t domid_bitmap_lock;    /* protect domain id bitmap */
 static int domid_bitmap_size;           /* domain id bitmap size in bit */
@@ -304,11 +301,12 @@ static void iommu_flush_write_buffer(str
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_context(
-    struct iommu *iommu,
+static int flush_context_reg(
+    void *_iommu,
     u16 did, u16 source_id, u8 function_mask, u64 type,
     int non_present_entry_flush)
 {
+    struct iommu *iommu = (struct iommu *) _iommu;
     u64 val = 0;
     unsigned long flag;
     unsigned long start_time;
@@ -367,14 +365,16 @@ static int inline iommu_flush_context_gl
 static int inline iommu_flush_context_global(
     struct iommu *iommu, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
                                  non_present_entry_flush);
 }
 
 static int inline iommu_flush_context_domain(
     struct iommu *iommu, u16 did, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
                                  non_present_entry_flush);
 }
 
@@ -382,16 +382,18 @@ static int inline iommu_flush_context_de
     struct iommu *iommu, u16 did, u16 source_id,
     u8 function_mask, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, did, source_id, function_mask,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, did, source_id, function_mask,
                                  DMA_CCMD_DEVICE_INVL,
                                  non_present_entry_flush);
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
+static int flush_iotlb_reg(void *_iommu, u16 did,
                                u64 addr, unsigned int size_order, u64 type,
                                int non_present_entry_flush)
 {
+    struct iommu *iommu = (struct iommu *) _iommu;
     int tlb_offset = ecap_iotlb_offset(iommu->ecap);
     u64 val = 0, val_iva = 0;
     unsigned long flag;
@@ -467,14 +469,16 @@ static int inline iommu_flush_iotlb_glob
 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
                                            int non_present_entry_flush)
 {
-    return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
                                non_present_entry_flush);
 }
 
 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
                                         int non_present_entry_flush)
 {
-    return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
                                non_present_entry_flush);
 }
 
@@ -498,6 +502,7 @@ static int inline iommu_flush_iotlb_psi(
     u64 addr, unsigned int pages, int non_present_entry_flush)
 {
     unsigned int align;
+    struct iommu_flush *flush = iommu_get_flush(iommu);
 
     BUG_ON(addr & (~PAGE_MASK_4K));
     BUG_ON(pages == 0);
@@ -520,7 +525,7 @@ static int inline iommu_flush_iotlb_psi(
     addr >>= PAGE_SHIFT_4K + align;
     addr <<= PAGE_SHIFT_4K + align;
 
-    return __iommu_flush_iotlb(iommu, did, addr, align,
+    return flush->iotlb(iommu, did, addr, align,
                                DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
@@ -701,7 +706,7 @@ static int iommu_enable_translation(stru
     unsigned long flags;
 
     dprintk(XENLOG_INFO VTDPREFIX,
-            "iommu_enable_translation: enabling vt-d translation\n");
+            "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
     spin_lock_irqsave(&iommu->register_lock, flags);
     iommu->gcmd |= DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
@@ -746,14 +751,47 @@ static int iommu_page_fault_do_one(struc
                                    u8 fault_reason, u16 source_id, u32 addr)
 {
     dprintk(XENLOG_WARNING VTDPREFIX,
-            "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
-            (type ? "DMA Read" : "DMA Write"),
-            (source_id >> 8), PCI_SLOT(source_id & 0xFF),
-            PCI_FUNC(source_id & 0xFF), addr, fault_reason);
-
-    print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
-                      (addr >> PAGE_SHIFT)); 
+            "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
+            (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
+            PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
+            fault_reason, iommu->reg);
+
+    if (fault_reason < 0x20) 
+        print_vtd_entries(current->domain, iommu, (source_id >> 8),
+                          (source_id & 0xff), (addr >> PAGE_SHIFT)); 
+
     return 0;
+}
+
+static void iommu_fault_status(u32 fault_status)
+{
+    if (fault_status & DMA_FSTS_PFO)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Fault Overflow\n");
+    else
+    if (fault_status & DMA_FSTS_PPF)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Primary Pending Fault\n");
+    else
+    if (fault_status & DMA_FSTS_AFO)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Advanced Fault Overflow\n");
+    else
+    if (fault_status & DMA_FSTS_APF)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Advanced Pending Fault\n");
+    else
+    if (fault_status & DMA_FSTS_IQE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Queue Error\n");
+    else
+    if (fault_status & DMA_FSTS_ICE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Completion Error\n");
+    else
+    if (fault_status & DMA_FSTS_ITE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Time-out Error\n");
 }
 
 #define PRIMARY_FAULT_REG_LEN (16)
@@ -771,6 +809,8 @@ static void iommu_page_fault(int vector,
     spin_lock_irqsave(&iommu->register_lock, flags);
     fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+    iommu_fault_status(fault_status);
 
     /* FIXME: ignore advanced fault log */
     if ( !(fault_status & DMA_FSTS_PPF) )
@@ -936,6 +976,8 @@ struct iommu *iommu_alloc(void *hw_data)
 {
     struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
     struct iommu *iommu;
+    struct qi_ctrl *qi_ctrl;
+    struct ir_ctrl *ir_ctrl;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -951,9 +993,10 @@ struct iommu *iommu_alloc(void *hw_data)
 
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
     iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-    dprintk(XENLOG_INFO VTDPREFIX,
-            "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
-            iommu->reg, drhd->address);
+
+    printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
+           iommu->reg, drhd->address);
+
     nr_iommus++;
 
     if ( !iommu->reg )
@@ -965,8 +1008,18 @@ struct iommu *iommu_alloc(void *hw_data)
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
 
+    printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
+    printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
+
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
+
+    qi_ctrl = iommu_qi_ctrl(iommu);
+    spin_lock_init(&qi_ctrl->qinval_lock);
+    spin_lock_init(&qi_ctrl->qinval_poll_lock);
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    spin_lock_init(&ir_ctrl->iremap_lock);
 
     drhd->iommu = iommu;
     return iommu;
@@ -1071,8 +1124,10 @@ static int domain_context_mapping_one(
 
     if ( ecap_pass_thru(iommu->ecap) )
         context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+#ifdef CONTEXT_PASSTHRU
     else
     {
+#endif
         if ( !hd->pgd )
         {
             struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
@@ -1087,7 +1142,9 @@ static int domain_context_mapping_one(
  
         context_set_address_root(*context, virt_to_maddr(hd->pgd));
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-    }
+#ifdef CONTEXT_PASSTHRU
+    }
+#endif
 
     context_set_fault_enable(*context);
     context_set_present(*context);
@@ -1462,7 +1519,6 @@ void iommu_domain_teardown(struct domain
                 if ( pgd[0].val != 0 )
                     free_xenheap_page((void*)maddr_to_virt(
                         dma_pte_addr(pgd[0])));
-
                 free_xenheap_page((void *)hd->pgd);
             }
             break;
@@ -1503,9 +1559,11 @@ int iommu_map_page(struct domain *d, pad
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
+#ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
     if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
+#endif
 
     pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
     if ( !pg )
@@ -1538,9 +1596,11 @@ int iommu_unmap_page(struct domain *d, d
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
+#ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
     if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
+#endif
 
     dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
 
@@ -1711,7 +1771,7 @@ void __init setup_dom0_devices(void)
                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 }
 
-void clear_fault_bit(struct iommu *iommu)
+void clear_fault_bits(struct iommu *iommu)
 {
     u64 val;
 
@@ -1722,13 +1782,15 @@ void clear_fault_bit(struct iommu *iommu
         iommu->reg,
         cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
         val);
-    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
+    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
 }
 
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
+    struct iommu_flush *flush = NULL;
+    int vector;
     int ret;
 
     for_each_drhd_unit ( drhd )
@@ -1740,29 +1802,37 @@ static int init_vtd_hw(void)
             gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
             return -EIO;
         }
-    }
-
-    return 0;
-}
-
-static int enable_vtd_translation(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    int vector = 0;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
+
         vector = iommu_set_interrupt(iommu);
         dma_msi_data_init(iommu, vector);
         dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
         iommu->vector = vector;
-        clear_fault_bit(iommu);
+        clear_fault_bits(iommu);
+        dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
+
+        /* initialize flush functions */
+        flush = iommu_get_flush(iommu);
+        flush->context = flush_context_reg;
+        flush->iotlb = flush_iotlb_reg;
+
+        if ( qinval_setup(iommu) != 0);
+            dprintk(XENLOG_ERR VTDPREFIX,
+                    "Queued Invalidation hardware not found\n");
+    }
+    return 0;
+}
+
+static int enable_vtd_translation(void)
+{
+    struct acpi_drhd_unit *drhd;
+    struct iommu *iommu;
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
         if ( iommu_enable_translation(iommu) )
             return -EIO;
     }
-
     return 0;
 }
 
@@ -1792,9 +1862,6 @@ int iommu_setup(void)
 
     spin_lock_init(&domid_bitmap_lock);
     INIT_LIST_HEAD(&hd->pdev_list);
-
-    /* start from scratch */
-    iommu_flush_all();
 
     /* setup clflush size */
     x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
@@ -1815,12 +1882,12 @@ int iommu_setup(void)
     for ( i = 0; i < max_page; i++ )
         iommu_map_page(dom0, i, i);
 
+    enable_vtd_translation();
     if ( init_vtd_hw() )
         goto error;
     setup_dom0_devices();
     setup_dom0_rmrr();
-    if ( enable_vtd_translation() )
-        goto error;
+    iommu_flush_all();
 
     return 0;
 
diff -r bf534df13095 -r 6de1c481bc7f xen/arch/x86/hvm/vmx/vtd/qinval.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/qinval.c Tue Jan 22 11:24:28 2008 +0000
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
+ */
+
+
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/domain_page.h>
+#include <asm/delay.h>
+#include <asm/string.h>
+#include <asm/iommu.h>
+#include <asm/hvm/vmx/intel-iommu.h>
+#include "dmar.h"
+#include "vtd.h"
+#include "pci-direct.h"
+#include "pci_regs.h"
+#include "msi.h"
+#include "extern.h"
+
+static void print_qi_regs(struct iommu *iommu)
+{
+    u64 val;
+
+    val = dmar_readq(iommu->reg, DMAR_IQA_REG);
+    printk("DMAR_IAQ_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQH_REG);
+    printk("DMAR_IAH_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    printk("DMAR_IAT_REG = %"PRIx64"\n", val);
+}
+
+static int qinval_next_index(struct iommu *iommu)
+{
+    u64 val;
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    return (val >> 4);
+}
+
+static int qinval_update_qtail(struct iommu *iommu, int index)
+{
+    u64 val;
+
+    /* Need an ASSERT to insure that we have got register lock */
+    val = (index < (QINVAL_ENTRY_NR-1)) ? (index + 1) : 0;
+    dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << 4));
+    return 0;
+}
+
+static int gen_cc_inv_dsc(struct iommu *iommu, int index,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    u64 *ptr64;
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
+    qinval_entry->q.cc_inv_dsc.lo.granu = granu;
+    qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.cc_inv_dsc.lo.did = did;
+    qinval_entry->q.cc_inv_dsc.lo.sid = source_id;
+    qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
+    qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.cc_inv_dsc.hi.res = 0;
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+
+    ptr64 = (u64 *)qinval_entry;
+    return 0;
+}
+
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    if (index == -1)
+        return -EBUSY;
+    ret = gen_cc_inv_dsc(iommu, index, did, source_id,
+                         function_mask, granu);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
+    qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.dw = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.did = did;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_2 = 0;
+
+    qinval_entry->q.iotlb_inv_dsc.hi.am = am;
+    qinval_entry->q.iotlb_inv_dsc.hi.ih = ih;
+    qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+
+    index = qinval_next_index(iommu);
+    ret = gen_iotlb_inv_dsc(iommu, index, granu, dr, dw, did,
+                            am, ih, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_wait_dsc(struct iommu *iommu, int index,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    u64 *ptr64;
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
+    qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
+    qinval_entry->q.inv_wait_dsc.lo.sw = sw;
+    qinval_entry->q.inv_wait_dsc.lo.fn = fn;
+    qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
+    qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    ptr64 = (u64 *)qinval_entry;
+    return 0;
+}
+
+static int queue_invalidate_wait(struct iommu *iommu,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    unsigned long flags;
+    unsigned long start_time;
+    int index = -1;
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    if (*saddr == 1)
+        *saddr = 0;
+    ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+    /* Now we don't support interrupt method */
+    if ( sw )
+    {
+        /* In case all wait descriptor writes to same addr with same data */
+        start_time = jiffies;
+        while ( *saddr != 1 ) {
+            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
+                print_qi_regs(iommu);
+                panic("queue invalidate wait descriptor was not executed\n");
+            }
+            cpu_relax();
+        }
+    }
+    spin_unlock_irqrestore(&qi_ctrl->qinval_poll_lock, flags);
+    return ret;
+}
+
+int invalidate_sync(struct iommu *iommu)
+{
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if (qi_ctrl->qinval)
+    {
+        ret = queue_invalidate_wait(iommu,
+            0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
+        return ret;
+    }
+    return 0;
+}
+
+static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
+
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_device_iotlb(struct iommu *iommu,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_dev_iotlb_inv_dsc(iommu, index, max_invs_pend,
+                                sid, size, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iec_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 im, u16 iidx)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
+    qinval_entry->q.iec_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iec_inv_dsc.lo.im = im;
+    qinval_entry->q.iec_inv_dsc.lo.iidx = iidx;
+    qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.iec_inv_dsc.hi.res = 0;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_iec_inv_dsc(iommu, index, granu, im, iidx);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+u64 iec_cap;
+int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    ret = queue_invalidate_iec(iommu, granu, im, iidx);
+    ret |= invalidate_sync(iommu);
+
+    /*
+     * reading vt-d architecture register will ensure
+     * draining happens in implementation independent way.
+     */
+    iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+    return ret;
+}
+
+int iommu_flush_iec_global(struct iommu *iommu)
+{
+    return __iommu_flush_iec(iommu, IEC_GLOBAL_INVL, 0, 0);
+}
+
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
+{
+   return __iommu_flush_iec(iommu, IEC_INDEX_INVL, im, iidx);
+}
+
+static int flush_context_qi(
+    void *_iommu, u16 did, u16 sid, u8 fm, u64 type,
+    int non_present_entry_flush)
+{
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if (qi_ctrl->qinval)
+    {
+        ret = queue_invalidate_context(iommu, did, sid, fm,
+                                       type >> DMA_CCMD_INVL_GRANU_OFFSET);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+static int flush_iotlb_qi(
+    void *_iommu, u16 did,
+    u64 addr, unsigned int size_order, u64 type,
+    int non_present_entry_flush)
+{
+    u8 dr = 0, dw = 0;
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if (qi_ctrl->qinval) {
+        /* use queued invalidation */
+        if (cap_write_drain(iommu->cap))
+            dw = 1;
+        if (cap_read_drain(iommu->cap))
+            dr = 1;
+        /* Need to conside the ih bit later */
+        ret = queue_invalidate_iotlb(iommu,
+                  (type >> DMA_TLB_FLUSH_GRANU_OFFSET), dr,
+                  dw, did, (u8)size_order, 0, addr);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+int qinval_setup(struct iommu *iommu)
+{
+    unsigned long start_time;
+    u64 paddr;
+    u32 status = 0;
+    struct qi_ctrl *qi_ctrl;
+    struct iommu_flush *flush;
+
+    qi_ctrl = iommu_qi_ctrl(iommu);
+    flush = iommu_get_flush(iommu);
+
+    if ( !ecap_queued_inval(iommu->ecap) )
+        return -ENODEV;
+
+    if (qi_ctrl->qinval == NULL) {
+        qi_ctrl->qinval = alloc_xenheap_page();
+        if (qi_ctrl->qinval == NULL)
+            panic("Cannot allocate memory for qi_ctrl->qinval\n");
+        memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
+        flush->context = flush_context_qi;
+        flush->iotlb = flush_iotlb_qi;
+    }
+    paddr = virt_to_maddr(qi_ctrl->qinval);
+
+    /* Setup Invalidation Queue Address(IQA) register with the
+     * address of the page we just allocated.  QS field at
+     * bits[2:0] to indicate size of queue is one 4KB page.
+     * That's 256 entries.  Queued Head (IQH) and Queue Tail (IQT)
+     * registers are automatically reset to 0 with write
+     * to IQA register.
+     */
+    dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
+
+    /* enable queued invalidation hardware */
+    iommu->gcmd |= DMA_GCMD_QIE;
+    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+    /* Make sure hardware complete it */
+    start_time = jiffies;
+    while (1) {
+        status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+        if (status & DMA_GSTS_QIES)
+            break;
+        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+            panic("Cannot set QIE field for queue invalidation\n");
+        cpu_relax();
+    }
+    status = 0;
+    return status;
+}
diff -r bf534df13095 -r 6de1c481bc7f xen/arch/x86/hvm/vmx/vtd/vtd.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/vtd.h    Tue Jan 22 11:24:28 2008 +0000
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx>
+ */
+
+#ifndef _VTD_H_
+#define _VTD_H_
+
+#include <xen/list.h>
+#include <asm/iommu.h>
+
+#define VTDPREFIX "[VT-D]" 
+
+#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
+#define time_after(a,b)         \
+        (typecheck(unsigned long, a) && \
+         typecheck(unsigned long, b) && \
+         ((long)(b) - (long)(a) < 0))
+
+struct IO_APIC_route_remap_entry {
+    union {
+        u64 val;
+        struct {
+            u64 vector:8,
+            delivery_mode:3,
+            index_15:1,
+            delivery_status:1,
+            polarity:1,
+            irr:1,
+            trigger:1,
+            mask:1,
+            reserved:31,
+            format:1,
+            index_0_14:15;
+        };
+    };
+};
+
+#endif // _VTD_H_
diff -r bf534df13095 -r 6de1c481bc7f xen/include/asm-x86/iommu.h
--- a/xen/include/asm-x86/iommu.h       Tue Jan 22 11:23:33 2008 +0000
+++ b/xen/include/asm-x86/iommu.h       Tue Jan 22 11:24:28 2008 +0000
@@ -31,6 +31,9 @@ extern int vtd_enabled;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
+#define iommu_qi_ctrl(iommu)    (&(iommu->intel.qi_ctrl));
+#define iommu_ir_ctrl(iommu)    (&(iommu->intel.ir_ctrl));
+#define iommu_get_flush(iommu)  (&(iommu->intel.flush));
 
 /*
  * The PCI interface treats multi-function devices as independent
@@ -61,6 +64,7 @@ struct iommu {
     spinlock_t register_lock; /* protect iommu register handling */
     struct root_entry *root_entry; /* virtual address */
     unsigned int vector;
+    struct intel_iommu intel;
 };
 
 int iommu_setup(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-3.2-testing] vtd: Enable queued invalidation method if such HW support is, Xen patchbot-3.2-testing <=