diff -r 05e36e506c09 xen/arch/x86/hvm/vmx/vtd/Makefile --- a/xen/arch/x86/hvm/vmx/vtd/Makefile Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Wed Jan 23 17:11:45 2008 -0800 @@ -3,3 +3,4 @@ obj-y += utils.o obj-y += utils.o obj-y += io.o obj-y += qinval.o +obj-y += intremap.o diff -r 05e36e506c09 xen/arch/x86/hvm/vmx/vtd/extern.h --- a/xen/arch/x86/hvm/vmx/vtd/extern.h Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/extern.h Wed Jan 23 17:11:45 2008 -0800 @@ -23,10 +23,6 @@ #include "dmar.h" -extern int iommu_setup_done; -extern int vtd2_thurley_enabled; -extern int vtd2_qinval_enabled; - extern spinlock_t ioapic_lock; extern struct qi_ctrl *qi_ctrl; extern struct ir_ctrl *ir_ctrl; @@ -36,6 +32,7 @@ void print_vtd_entries(struct domain *d, int bus, int devfn, unsigned long gmfn); int qinval_setup(struct iommu *iommu); +int intremap_setup(struct iommu *iommu); int queue_invalidate_context(struct iommu *iommu, u16 did, u16 source_id, u8 function_mask, u8 granu); int queue_invalidate_iotlb(struct iommu *iommu, diff -r 05e36e506c09 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Wed Jan 23 17:11:45 2008 -0800 @@ -1815,9 +1815,13 @@ static int init_vtd_hw(void) flush->context = flush_context_reg; flush->iotlb = flush_iotlb_reg; - if ( qinval_setup(iommu) != 0); + if ( qinval_setup(iommu) != 0 ) dprintk(XENLOG_ERR VTDPREFIX, "Queued Invalidation hardware not found\n"); + + if ( intremap_setup(iommu) != 0 ) + dprintk(XENLOG_ERR VTDPREFIX, + "Interrupt Remapping hardware not found\n"); } return 0; } diff -r 05e36e506c09 xen/arch/x86/hvm/vmx/vtd/io.c --- a/xen/arch/x86/hvm/vmx/vtd/io.c Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c Wed Jan 23 17:11:45 2008 -0800 @@ -44,6 +44,7 @@ #include #include #include +#include "extern.h" static void pt_irq_time_out(void *data) { @@ -130,6 +131,9 @@ int pt_irq_create_bind_vtd( /* Deal with gsi for legacy devices */ pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE); } + + /* remap the interrupt if interrupt remapping HW is available */ + gsi_remapping(machine_gsi); gdprintk(XENLOG_INFO VTDPREFIX, "VT-d irq bind: m_irq = %x device = %x intx = %x\n", diff -r 05e36e506c09 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/io_apic.c Wed Jan 23 17:11:45 2008 -0800 @@ -44,7 +44,7 @@ atomic_t irq_mis_count; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); +DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); int skip_ioapic_setup; diff -r 05e36e506c09 xen/arch/x86/mpparse.c --- a/xen/arch/x86/mpparse.c Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/arch/x86/mpparse.c Wed Jan 23 17:11:45 2008 -0800 @@ -860,12 +860,7 @@ void __devinit mp_register_lapic ( #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -static struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; +struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic ( diff -r 05e36e506c09 xen/include/asm-x86/mpspec_def.h --- a/xen/include/asm-x86/mpspec_def.h Wed Jan 23 18:12:37 2008 +0000 +++ b/xen/include/asm-x86/mpspec_def.h Wed Jan 23 17:11:45 2008 -0800 @@ -184,5 +184,13 @@ enum mp_bustype { MP_BUS_MCA, MP_BUS_NEC98 }; + +struct mp_ioapic_routing { + int apic_id; + int gsi_base; + int gsi_end; + u32 pin_programmed[4]; +}; + #endif diff -r 05e36e506c09 xen/arch/x86/hvm/vmx/vtd/intremap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/intremap.c Wed Jan 23 17:11:45 2008 -0800 @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay + * Copyright (C) Xiaohui Xin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "dmar.h" +#include "vtd.h" +#include "pci-direct.h" +#include "pci_regs.h" +#include "msi.h" +#include "extern.h" + +extern struct mp_ioapic_routing mp_ioapic_routing[]; + +/* Functions for interrupt remapping */ + +u16 apicid_to_bdf(int apic_id) +{ + struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id); + struct acpi_ioapic_unit *acpi_ioapic_unit; + + list_for_each_entry(acpi_ioapic_unit, &drhd->ioapic_list, list) + { + if ( acpi_ioapic_unit->apic_id == apic_id ) + return acpi_ioapic_unit->ioapic.info; + } + dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for the apic_id!\n"); + return 0; +} + +static void remap_entry_to_ioapic_rte( + struct iommu *iommu, struct IO_APIC_route_entry *old_rte) +{ + struct iremap_entry *iremap_entry = NULL; + struct IO_APIC_route_remap_entry *remap_rte; + unsigned int index; + unsigned long flags; + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + + if ( ir_ctrl == NULL ) + dprintk(XENLOG_ERR VTDPREFIX, + "remap_entry_to_ioapic_rte: ir_ctl == NULL"); + + remap_rte = (struct IO_APIC_route_remap_entry *) old_rte; + index = (remap_rte->index_15 << 15) + remap_rte->index_0_14; + + if ( index > ir_ctrl->iremap_index ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "Index is larger than remap table entry size. Error!\n"); + return; + } + spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); + + iremap_entry = &ir_ctrl->iremap[index]; + + old_rte->vector = iremap_entry->lo.vector; + old_rte->delivery_mode = iremap_entry->lo.dlm; + old_rte->dest_mode = iremap_entry->lo.dm; + old_rte->trigger = iremap_entry->lo.tm; + old_rte->__reserved_2 = 0; + old_rte->dest.logical.__reserved_1 = 0; + old_rte->dest.logical.logical_dest = iremap_entry->lo.dst; + + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); + return; +} + +static void ioapic_rte_to_remap_entry(struct iommu *iommu, + int apic_id, struct IO_APIC_route_entry *old_rte) +{ + struct iremap_entry *iremap_entry = NULL; + struct IO_APIC_route_remap_entry *remap_rte; + unsigned int index; + unsigned long flags; + int ret = 0; + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + + remap_rte = (struct IO_APIC_route_remap_entry *) old_rte; + spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); + index = ir_ctrl->iremap_index; + if ( index > IREMAP_ENTRY_NR - 1 ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "The interrupt number is more than 256!\n"); + goto out; + } + + iremap_entry = &(ir_ctrl->iremap[index]); + if ( *(u64 *)iremap_entry != 0 ) + { + dprintk(XENLOG_WARNING VTDPREFIX, + "Interrupt remapping entry is inused already!\n"); + } + iremap_entry->lo.fpd = 0; + iremap_entry->lo.dm = old_rte->dest_mode; + iremap_entry->lo.rh = 0; + iremap_entry->lo.tm = old_rte->trigger; + iremap_entry->lo.dlm = old_rte->delivery_mode; + iremap_entry->lo.avail = 0; + iremap_entry->lo.res_1 = 0; + iremap_entry->lo.vector = old_rte->vector; + iremap_entry->lo.res_2 = 0; + iremap_entry->lo.dst = (old_rte->dest.logical.logical_dest << 8); + iremap_entry->hi.sid = apicid_to_bdf(apic_id); + iremap_entry->hi.sq = 0; /* comparing all 16-bit of SID */ + iremap_entry->hi.svt = 0; /* turn on requestor ID verification SID/SQ */ + iremap_entry->hi.res_1 = 0; + iremap_entry->lo.p = 1; /* finally, set present bit */ + ir_ctrl->iremap_index++; + + iommu_flush_iec_index(iommu, 0, index); + ret = invalidate_sync(iommu); + + /* now construct new ioapic rte entry */ + remap_rte->vector = old_rte->vector; + remap_rte->delivery_mode = 0; /* has to be 0 for remap format */ + remap_rte->index_15 = index & 0x8000; + remap_rte->index_0_14 = index & 0x7fff; + remap_rte->delivery_status = old_rte->delivery_status; + remap_rte->polarity = old_rte->polarity; + remap_rte->irr = old_rte->irr; + remap_rte->trigger = old_rte->trigger; + remap_rte->mask = 1; + remap_rte->reserved = 0; + remap_rte->format = 1; /* indicate remap format */ +out: + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); + return; +} + +unsigned int gsi_to_ioapic_pin(unsigned int gsi) +{ + int apic = 0; + + for ( apic = 0; apic < nr_ioapics; apic++ ) + { + if ((gsi >= mp_ioapic_routing[apic].gsi_base) + && (gsi <= mp_ioapic_routing[apic].gsi_end)) + return (gsi - mp_ioapic_routing[apic].gsi_base); + } + return -EINVAL; +} + +int gsi_to_ioapic(unsigned int gsi) +{ + int apic = 0; + + for ( apic = 0; apic < nr_ioapics; apic++ ) + if ((gsi >= mp_ioapic_routing[apic].gsi_base) + && (gsi <= mp_ioapic_routing[apic].gsi_end)) + return apic; + return -EINVAL; +} + +unsigned int +io_apic_read_remap_rte( + unsigned int apic, unsigned int reg) +{ + struct IO_APIC_route_entry old_rte = { 0 }; + struct IO_APIC_route_remap_entry *remap_rte; + int rte_upper = (reg & 1) ? 1 : 0; + struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + + if ( !iommu || !(ir_ctrl->iremap) ) + { + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); + } + + if (rte_upper) + reg--; + + /* read lower and upper 32-bits of rte entry */ + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&old_rte) + 0) = *(IO_APIC_BASE(apic)+4); + *IO_APIC_BASE(apic) = reg + 1; + *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4); + + remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte; + + if ( remap_rte->mask || (remap_rte->format == 0) ) + { + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); + } + + remap_entry_to_ioapic_rte(iommu, &old_rte); + if (rte_upper) { + *IO_APIC_BASE(apic) = reg + 1; + return ( *(((u32 *)&old_rte) + 1) ); + } else { + *IO_APIC_BASE(apic) = reg; + return ( *(((u32 *)&old_rte) + 0) ); + } +} + +void +io_apic_write_remap_rte( + unsigned int apic, unsigned int reg, unsigned int value) +{ + struct IO_APIC_route_entry old_rte = { 0 }; + struct IO_APIC_route_remap_entry *remap_rte; + int rte_upper = (reg & 1) ? 1 : 0; + struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + + if ( !iommu || !(ir_ctrl->iremap) ) + { + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = value; + return; + } + + if (rte_upper) + reg--; + + /* read both lower and upper 32-bits of rte entry */ + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&old_rte) + 0) = *(IO_APIC_BASE(apic)+4); + *IO_APIC_BASE(apic) = reg + 1; + *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4); + + remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte; + if ( remap_rte->mask || (remap_rte->format == 0) ) + { + *IO_APIC_BASE(apic) = rte_upper ? ++reg : reg; + *(IO_APIC_BASE(apic)+4) = value; + return; + } + + *(((u32 *)&old_rte) + rte_upper) = value; + ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte); + + /* write new entry to ioapic */ + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); + *IO_APIC_BASE(apic) = reg + 1; + *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1); +} + +void gsi_remapping(unsigned int gsi) +{ + struct IO_APIC_route_entry old_rte = { 0 }; + struct IO_APIC_route_entry rte = { 0 }; + struct IO_APIC_route_remap_entry remap_rte; + int apic; + unsigned int pin; + unsigned int reg; + unsigned long flags; + struct iommu *iommu; + struct ir_ctrl *ir_ctrl; + + apic = gsi_to_ioapic(gsi); + pin = gsi_to_ioapic_pin(gsi); + reg = 0x10 + 2 * pin; + iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); + ir_ctrl = iommu_ir_ctrl(iommu); + + /* return if interrupt remapping is not enabled */ + if ( !iommu || !(ir_ctrl->iremap) ) + return; + + spin_lock_irqsave(&ioapic_lock, flags); + + /* check to see if it is already in intr remap format */ + *IO_APIC_BASE(apic) = reg + 1; + *(((u32 *)&remap_rte) + 1) = *(IO_APIC_BASE(apic)+4); + if ( remap_rte.format ) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return; + } + + /* mask the interrupt */ + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4); + while (rte.mask == 0) { + rte.mask = 1; + *(IO_APIC_BASE(apic)+4) = *(((int *)&rte)+0); + rte.mask = 0; + *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4); + } + + /* read entry from ioapic */ + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&old_rte) + 0) = *(IO_APIC_BASE(apic)+4); + *IO_APIC_BASE(apic) = reg + 1; + *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4); + + /* contruct remap entry and new format rte */ + ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte); + + /* make sure no ourstanding irr and pending interrupt */ + while (rte.irr || rte.delivery_status ) { + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4); + } + + if (old_rte.mask == 0) + old_rte.mask = 1; + + /* write entry to ioapic */ + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); + *IO_APIC_BASE(apic) = reg + 1; + *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1); + + /* if still masked, unmask it */ + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4); + if (rte.mask == 1) { + rte.mask = 0; + *(IO_APIC_BASE(apic)+4) = *(((int *)&rte)+0); + } + + *IO_APIC_BASE(apic) = reg; + *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +int intremap_setup(struct iommu *iommu) +{ + struct ir_ctrl *ir_ctrl; + unsigned long start_time; + u64 paddr; + u32 status = 0; + + if ( !ecap_intr_remap(iommu->ecap) ) + return -ENODEV; + + ir_ctrl = iommu_ir_ctrl(iommu); + if (ir_ctrl->iremap == NULL) { + ir_ctrl->iremap = alloc_xenheap_page(); + if (ir_ctrl->iremap == NULL) { + dprintk(XENLOG_WARNING VTDPREFIX, + "Cannot allocate memory for ir_ctrl->iremap\n"); + return -ENODEV; + } + memset((u8*)ir_ctrl->iremap, 0, PAGE_SIZE_4K); + } + + paddr = virt_to_maddr(ir_ctrl->iremap); +#if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT) + /* set extended interrupt mode bit */ + paddr |= ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT):0; +#endif + /* size field = 256 entries per 4K page = 8 - 1 */ + paddr |= 7; + dmar_writeq(iommu->reg, DMAR_IRTA_REG, paddr); + + /* set SIRTP */ + iommu->gcmd |= DMA_GCMD_SIRTP; + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); + + /* Make sure hardware complete it */ + start_time = jiffies; + while (1) { + status = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (status & DMA_GSTS_SIRTPS) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("Cannot set SIRTP field for interrupt remapping\n"); + cpu_relax(); + } + + /* enable comaptiblity format interrupt pass through */ + iommu->gcmd |= DMA_GCMD_CFI; + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); + + start_time = jiffies; + while (1) { + status = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (status & DMA_GSTS_CFIS) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("Cannot set CFI field for interrupt remapping\n"); + cpu_relax(); + } + + /* enable interrupt remapping hardware */ + iommu->gcmd |= DMA_GCMD_IRE; + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); + + start_time = jiffies; + while (1) { + status = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (status & DMA_GSTS_IRES) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("Cannot set IRE field for interrupt remapping\n"); + cpu_relax(); + } + + /* After set SIRTP, we should do globally invalidate the IEC */ + iommu_flush_iec_global(iommu); + + status = 0; + return status; +}