# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1255946075 -3600
# Node ID 7e02a2cd1618240f64b83a89669f0fcfdc6cac2f
# Parent dcc5d5d954e942efd9a143ff3d5769e82178ecad
[IOMMU] clean interrupt remapping and queued invalidation
This patch enlarges interrupt remapping table to fix the out-of range
table access when using many multiple-function PCI devices.
Invalidation queue is also expanded.
Signed-Off-By: Zhai Edwin <edwin.zhai@xxxxxxxxx>
Signed-Off-By: Cui Dexuan <dexuan.cui@xxxxxxxxx>
---
xen/drivers/passthrough/vtd/intremap.c | 34 ++++++++++++------
xen/drivers/passthrough/vtd/iommu.c | 22 +++++++----
xen/drivers/passthrough/vtd/iommu.h | 41 ++++++++++++++++-----
xen/drivers/passthrough/vtd/qinval.c | 62 ++++++++++++++++-----------------
xen/drivers/passthrough/vtd/utils.c | 21 ++++++++---
5 files changed, 115 insertions(+), 65 deletions(-)
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c Mon Oct 19 10:54:35 2009 +0100
@@ -146,6 +146,7 @@ static int remap_entry_to_ioapic_rte(
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
if ( ir_ctrl == NULL )
{
@@ -164,9 +165,11 @@ static int remap_entry_to_ioapic_rte(
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
old_rte->vector = iremap_entry->lo.vector;
old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -192,6 +195,7 @@ static int ioapic_rte_to_remap_entry(str
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -208,15 +212,17 @@ static int ioapic_rte_to_remap_entry(str
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: intremap index (%d) is larger than"
- " the maximum index (%ld)!\n",
+ " the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
@@ -425,6 +431,7 @@ static int remap_entry_to_msi_msg(
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
if ( ir_ctrl == NULL )
{
@@ -447,9 +454,11 @@ static int remap_entry_to_msi_msg(
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
@@ -485,6 +494,7 @@ static int msi_msg_to_remap_entry(
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
remap_rte = (struct msi_msg_remap_entry *) msg;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -502,16 +512,18 @@ static int msi_msg_to_remap_entry(
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: intremap index (%d) is larger than"
- " the maximum index (%ld)!\n",
+ " the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
msi_desc->remap_index = -1;
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
/* Set interrupt remapping table entry */
@@ -619,7 +631,7 @@ int enable_intremap(struct iommu *iommu)
if ( ir_ctrl->iremap_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);
- ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, 1);
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR
);
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c Mon Oct 19 10:54:35 2009 +0100
@@ -135,16 +135,16 @@ void iommu_flush_cache_entry(void *addr)
void iommu_flush_cache_page(void *addr, unsigned long npages)
{
- __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
+ __iommu_flush_cache(addr, PAGE_SIZE * npages);
}
/* Allocate page table, return its machine address */
u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
{
struct acpi_rhsa_unit *rhsa;
- struct page_info *pg;
+ struct page_info *pg, *cur_pg;
u64 *vaddr;
- int node = -1;
+ int node = -1, i;
rhsa = drhd_to_rhsa(drhd);
if ( rhsa )
@@ -154,11 +154,17 @@ u64 alloc_pgtable_maddr(struct acpi_drhd
(node == -1 ) ? 0 : MEMF_node(node));
if ( !pg )
return 0;
- vaddr = __map_domain_page(pg);
- memset(vaddr, 0, PAGE_SIZE * npages);
-
- iommu_flush_cache_page(vaddr, npages);
- unmap_domain_page(vaddr);
+
+ cur_pg = pg;
+ for ( i = 0; i < npages; i++ )
+ {
+ vaddr = __map_domain_page(cur_pg);
+ memset(vaddr, 0, PAGE_SIZE);
+
+ iommu_flush_cache_page(vaddr, 1);
+ unmap_domain_page(vaddr);
+ cur_pg++;
+ }
return page_to_maddr(pg);
}
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h Mon Oct 19 10:54:35 2009 +0100
@@ -302,7 +302,23 @@ struct iremap_entry {
}hi;
};
};
-#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry))
+
+/* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
+#define IREMAP_PAGE_ORDER 8
+
+/*
+ * VTd engine handles 4K page, while CPU may have different page size on
+ * different arch. E.g. 16K on IPF.
+ */
+#define IREMAP_ARCH_PAGE_ORDER (IREMAP_PAGE_ORDER + PAGE_SHIFT_4K -
PAGE_SHIFT)
+#define IREMAP_ARCH_PAGE_NR ( IREMAP_ARCH_PAGE_ORDER < 0 ? \
+ 1 : \
+ 1 << IREMAP_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per 4K page */
+#define IREMAP_ENTRY_ORDER ( PAGE_SHIFT - 4 )
+#define IREMAP_ENTRY_NR ( 1 << ( IREMAP_PAGE_ORDER + 8 ) )
+
#define iremap_present(v) ((v).lo & 1)
#define iremap_fault_disable(v) (((v).lo >> 1) & 1)
@@ -392,12 +408,17 @@ struct qinval_entry {
}q;
};
-/* Order of queue invalidation pages */
-#define IQA_REG_QS 0
-#define NUM_QINVAL_PAGES (1 << IQA_REG_QS)
-
-/* Each entry is 16 byte */
-#define QINVAL_ENTRY_NR (1 << (IQA_REG_QS + 8))
+/* Order of queue invalidation pages(max is 8) */
+#define QINVAL_PAGE_ORDER 2
+
+#define QINVAL_ARCH_PAGE_ORDER (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K -
PAGE_SHIFT)
+#define QINVAL_ARCH_PAGE_NR ( QINVAL_ARCH_PAGE_ORDER < 0 ? \
+ 1 : \
+ 1 << QINVAL_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per page */
+#define QINVAL_ENTRY_ORDER ( PAGE_SHIFT - 4 )
+#define QINVAL_ENTRY_NR (1 << (QINVAL_PAGE_ORDER + 8))
/* Status data flag */
#define QINVAL_STAT_INIT 0
@@ -429,9 +450,9 @@ struct qinval_entry {
#define IEC_GLOBAL_INVL 0
#define IEC_INDEX_INVL 1
#define IRTA_REG_EIME_SHIFT 11
-#define IRTA_REG_TABLE_SIZE 7 // 4k page = 256 * 16 byte entries
- // 2^^(IRTA_REG_TABLE_SIZE + 1) = 256
- // IRTA_REG_TABLE_SIZE = 7
+
+/* 2^(IRTA_REG_TABLE_SIZE + 1) = IREMAP_ENTRY_NR */
+#define IRTA_REG_TABLE_SIZE ( IREMAP_PAGE_ORDER + 7 )
#define VTD_PAGE_TABLE_LEVEL_3 3
#define VTD_PAGE_TABLE_LEVEL_4 4
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c Mon Oct 19 10:54:35 2009 +0100
@@ -45,17 +45,15 @@ static void print_qi_regs(struct iommu *
static int qinval_next_index(struct iommu *iommu)
{
- u64 tail, head;
+ u64 tail;
tail = dmar_readq(iommu->reg, DMAR_IQT_REG);
tail >>= QINVAL_INDEX_SHIFT;
- head = dmar_readq(iommu->reg, DMAR_IQH_REG);
- head >>= QINVAL_INDEX_SHIFT;
-
- /* round wrap check */
- if ( ( tail + 1 ) % QINVAL_ENTRY_NR == head )
- return -1;
+ /* (tail+1 == head) indicates a full queue, wait for HW */
+ while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
+ ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+ cpu_relax();
return tail;
}
@@ -77,11 +75,13 @@ static int gen_cc_inv_dsc(struct iommu *
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
qinval_entry->q.cc_inv_dsc.lo.granu = granu;
qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -121,14 +121,14 @@ static int gen_iotlb_inv_dsc(struct iomm
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
- if ( index == -1 )
- return -1;
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
qinval_entry->q.iotlb_inv_dsc.lo.dr = dr;
@@ -172,13 +172,13 @@ static int gen_wait_dsc(struct iommu *io
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
- if ( index == -1 )
- return -1;
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -247,14 +247,14 @@ static int gen_dev_iotlb_inv_dsc(struct
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
- if ( index == -1 )
- return -1;
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -295,14 +295,14 @@ static int gen_iec_inv_dsc(struct iommu
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
- if ( index == -1 )
- return -1;
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
qinval_entry->q.iec_inv_dsc.lo.granu = granu;
qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -445,7 +445,7 @@ int enable_qinval(struct iommu *iommu)
if ( qi_ctrl->qinval_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);
- qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES);
+ qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
if ( qi_ctrl->qinval_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
@@ -464,7 +464,7 @@ int enable_qinval(struct iommu *iommu)
* registers are automatically reset to 0 with write
* to IQA register.
*/
- qi_ctrl->qinval_maddr |= IQA_REG_QS;
+ qi_ctrl->qinval_maddr |= QINVAL_PAGE_ORDER;
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/utils.c Mon Oct 19 10:54:35 2009 +0100
@@ -226,8 +226,7 @@ static void dump_iommu_info(unsigned cha
/* Dump interrupt remapping table. */
u64 iremap_maddr = dmar_readq(iommu->reg, DMAR_IRTA_REG);
int nr_entry = 1 << ((iremap_maddr & 0xF) + 1);
- struct iremap_entry *iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(iremap_maddr);
+ struct iremap_entry *iremap_entries = NULL;
printk(" Interrupt remapping table (nr_entry=0x%x. "
"Only dump P=1 entries here):\n", nr_entry);
@@ -235,7 +234,18 @@ static void dump_iommu_info(unsigned cha
"FPD P\n");
for ( i = 0; i < nr_entry; i++ )
{
- struct iremap_entry *p = iremap_entries + i;
+ struct iremap_entry *p;
+ if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+ {
+ /* This entry across page boundry */
+ u64 entry_base = iremap_maddr +
+ (( i >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
+ if ( iremap_entries )
+ unmap_vtd_domain_page(iremap_entries);
+ iremap_entries =
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ }
+ p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
if ( !p->lo.p )
continue;
@@ -246,8 +256,9 @@ static void dump_iommu_info(unsigned cha
(u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
(u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
}
-
- unmap_vtd_domain_page(iremap_entries);
+ if ( iremap_entries )
+ unmap_vtd_domain_page(iremap_entries);
+
}
}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|