Well, this *is* the correct fix to the original problem. It's likely
then that there's either a bug in a BIOS somewhere, or a bug in the
per-device intremap table code that was hidden by global intremap
being the default.
However, from a practical perspective, if Wei doesn't have time to
work on it, I may not for several weeks; during which time, I think
reverting this patch to un-break testing for everyone else makes
sense.
This bug will likely be affecting our upcoming XenServer as well, so
let me see if I can prioritize working on it.
-George
On Tue, Aug 16, 2011 at 12:09 PM, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> wrote:
> 23733:fbf3768e5934 causes xen-unstable not to boot on several of the
> xen.org AMD test systems. We get an endless series of these:
>
> (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0x00a0, fault address =
> 0xfdf8f10144
>
> I have constructed the attached patch which reverts c/s 23733
> (adjusted for conflicts due to subsequent patches). With this
> reversion Xen once more boots on these machines.
>
> 23733 has been in the tree for some time now, causing this breakage,
> and has already been fingered by the automatic bisector and discussed
> on xen-devel as the cause of boot failures. I think it is now time to
> revert it pending a correct fix to the original problem.
>
> Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
>
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_acpi.c
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Tue Aug 16 11:57:01 2011 +0100
> @@ -66,8 +66,15 @@ static void __init add_ivrs_mapping_entr
> if (ivrs_mappings[alias_id].intremap_table == NULL )
> {
> /* allocate per-device interrupt remapping table */
> - ivrs_mappings[alias_id].intremap_table =
> + if ( amd_iommu_perdev_intremap )
> + ivrs_mappings[alias_id].intremap_table =
> amd_iommu_alloc_intremap_table();
> + else
> + {
> + if ( shared_intremap_table == NULL )
> + shared_intremap_table = amd_iommu_alloc_intremap_table();
> + ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
> + }
> }
> /* assgin iommu hardware */
> ivrs_mappings[bdf].iommu = iommu;
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_init.c
> --- a/xen/drivers/passthrough/amd/iommu_init.c Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 16 11:57:01 2011 +0100
> @@ -500,7 +500,7 @@ static void parse_event_log_entry(u32 en
> /* Tell the device to stop DMAing; we can't rely on the guest to
> * control it for us. */
> for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
> - if ( get_requestor_id(bdf) == device_id )
> + if ( get_dma_requestor_id(bdf) == device_id )
> {
> cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf),
> PCI_FUNC(bdf), PCI_COMMAND);
> @@ -772,7 +772,8 @@ static int __init init_ivrs_mapping(void
> ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED;
> ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED;
>
> - spin_lock_init(&ivrs_mappings[bdf].intremap_lock);
> + if ( amd_iommu_perdev_intremap )
> + spin_lock_init(&ivrs_mappings[bdf].intremap_lock);
> }
> return 0;
> }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_intr.c
> --- a/xen/drivers/passthrough/amd/iommu_intr.c Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c Tue Aug 16 11:57:01 2011 +0100
> @@ -28,10 +28,20 @@
> #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
>
> int ioapic_bdf[MAX_IO_APICS];
> +void *shared_intremap_table;
> +static DEFINE_SPINLOCK(shared_intremap_lock);
>
> static spinlock_t* get_intremap_lock(int req_id)
> {
> - return &ivrs_mappings[req_id].intremap_lock;
> + return (amd_iommu_perdev_intremap ?
> + &ivrs_mappings[req_id].intremap_lock:
> + &shared_intremap_lock);
> +}
> +
> +static int get_intremap_requestor_id(int bdf)
> +{
> + ASSERT( bdf < ivrs_bdf_entries );
> + return ivrs_mappings[bdf].dte_requestor_id;
> }
>
> static int get_intremap_offset(u8 vector, u8 dm)
> @@ -115,7 +125,7 @@ static void update_intremap_entry_from_i
> spinlock_t *lock;
> int offset;
>
> - req_id = get_requestor_id(bdf);
> + req_id = get_intremap_requestor_id(bdf);
> lock = get_intremap_lock(req_id);
>
> delivery_mode = rte->delivery_mode;
> @@ -173,7 +183,7 @@ int __init amd_iommu_setup_ioapic_remapp
> continue;
> }
>
> - req_id = get_requestor_id(bdf);
> + req_id = get_intremap_requestor_id(bdf);
> lock = get_intremap_lock(req_id);
>
> delivery_mode = rte.delivery_mode;
> @@ -273,13 +283,14 @@ static void update_intremap_entry_from_m
> {
> unsigned long flags;
> u32* entry;
> - u16 bdf, req_id;
> + u16 bdf, req_id, alias_id;
> u8 delivery_mode, dest, vector, dest_mode;
> spinlock_t *lock;
> int offset;
>
> bdf = (pdev->bus << 8) | pdev->devfn;
> - req_id = get_requestor_id(bdf);
> + req_id = get_dma_requestor_id(bdf);
> + alias_id = get_intremap_requestor_id(bdf);
>
> if ( msg == NULL )
> {
> @@ -288,6 +299,14 @@ static void update_intremap_entry_from_m
> free_intremap_entry(req_id, msi_desc->remap_index);
> spin_unlock_irqrestore(lock, flags);
>
> + if ( ( req_id != alias_id ) &&
> + ivrs_mappings[alias_id].intremap_table != NULL )
> + {
> + lock = get_intremap_lock(alias_id);
> + spin_lock_irqsave(lock, flags);
> + free_intremap_entry(alias_id, msi_desc->remap_index);
> + spin_unlock_irqrestore(lock, flags);
> + }
> goto done;
> }
>
> @@ -305,11 +324,30 @@ static void update_intremap_entry_from_m
> update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
> spin_unlock_irqrestore(lock, flags);
>
> + /*
> + * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
> + * will use alias id to index interrupt remapping table.
> + * We have to setup a secondary interrupt remapping entry to satisfy
> those
> + * devices.
> + */
> +
> + lock = get_intremap_lock(alias_id);
> + if ( ( req_id != alias_id ) &&
> + ivrs_mappings[alias_id].intremap_table != NULL )
> + {
> + spin_lock_irqsave(lock, flags);
> + entry = (u32*)get_intremap_entry(alias_id, offset);
> + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
> + spin_unlock_irqrestore(lock, flags);
> + }
> +
> done:
> if ( iommu->enabled )
> {
> spin_lock_irqsave(&iommu->lock, flags);
> invalidate_interrupt_table(iommu, req_id);
> + if ( alias_id != req_id )
> + invalidate_interrupt_table(iommu, alias_id);
> flush_command_buffer(iommu);
> spin_unlock_irqrestore(&iommu->lock, flags);
> }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_map.c
> --- a/xen/drivers/passthrough/amd/iommu_map.c Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Aug 16 11:57:01 2011 +0100
> @@ -543,7 +543,7 @@ static int update_paging_mode(struct dom
> for_each_pdev( d, pdev )
> {
> bdf = (pdev->bus << 8) | pdev->devfn;
> - req_id = get_requestor_id(bdf);
> + req_id = get_dma_requestor_id(bdf);
> iommu = find_iommu_for_device(bdf);
> if ( !iommu )
> {
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/pci_amd_iommu.c
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Sat Aug 13 10:14:58
> 2011 +0100
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 16 11:57:01
> 2011 +0100
> @@ -34,10 +34,25 @@ struct amd_iommu *find_iommu_for_device(
> return ivrs_mappings[bdf].iommu;
> }
>
> -int get_requestor_id(u16 bdf)
> +/*
> + * Some devices will use alias id and original device id to index interrupt
> + * table and I/O page table respectively. Such devices will have
> + * both alias entry and select entry in IVRS structure.
> + *
> + * Return original device id, if device has valid interrupt remapping
> + * table setup for both select entry and alias entry.
> + */
> +int get_dma_requestor_id(u16 bdf)
> {
> + int req_id;
> +
> BUG_ON ( bdf >= ivrs_bdf_entries );
> - return ivrs_mappings[bdf].dte_requestor_id;
> + req_id = ivrs_mappings[bdf].dte_requestor_id;
> + if ( (ivrs_mappings[bdf].intremap_table != NULL) &&
> + (ivrs_mappings[req_id].intremap_table != NULL) )
> + req_id = bdf;
> +
> + return req_id;
> }
>
> static int is_translation_valid(u32 *entry)
> @@ -79,7 +94,7 @@ static void amd_iommu_setup_domain_devic
> valid = 0;
>
> /* get device-table entry */
> - req_id = get_requestor_id(bdf);
> + req_id = get_dma_requestor_id(bdf);
> dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
>
> spin_lock_irqsave(&iommu->lock, flags);
> @@ -252,7 +267,7 @@ static void amd_iommu_disable_domain_dev
> int req_id;
>
> BUG_ON ( iommu->dev_table.buffer == NULL );
> - req_id = get_requestor_id(bdf);
> + req_id = get_dma_requestor_id(bdf);
> dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
>
> spin_lock_irqsave(&iommu->lock, flags);
> @@ -314,7 +329,7 @@ static int amd_iommu_assign_device(struc
> static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
> {
> int bdf = (bus << 8) | devfn;
> - int req_id = get_requestor_id(bdf);
> + int req_id = get_dma_requestor_id(bdf);
>
> if ( ivrs_mappings[req_id].unity_map_enable )
> {
> @@ -433,7 +448,7 @@ static int amd_iommu_group_id(u8 bus, u8
> int rt;
> int bdf = (bus << 8) | devfn;
> rt = ( bdf < ivrs_bdf_entries ) ?
> - get_requestor_id(bdf) :
> + get_dma_requestor_id(bdf) :
> bdf;
> return rt;
> }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/iommu.c
> --- a/xen/drivers/passthrough/iommu.c Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/iommu.c Tue Aug 16 11:57:01 2011 +0100
> @@ -50,6 +50,7 @@ bool_t __read_mostly iommu_hap_pt_share;
> bool_t __read_mostly iommu_hap_pt_share;
> bool_t __read_mostly iommu_debug;
> bool_t __read_mostly iommu_amd_perdev_vector_map = 1;
> +bool_t __read_mostly amd_iommu_perdev_intremap;
>
> static void __init parse_iommu_param(char *s)
> {
> @@ -76,6 +77,8 @@ static void __init parse_iommu_param(cha
> iommu_intremap = 0;
> else if ( !strcmp(s, "debug") )
> iommu_debug = 1;
> + else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
> + amd_iommu_perdev_intremap = 1;
> else if ( !strcmp(s, "dom0-passthrough") )
> iommu_passthrough = 1;
> else if ( !strcmp(s, "dom0-strict") )
> diff -r 8d6edc3d26d2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Sat Aug 13 10:14:58
> 2011 +0100
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Aug 16 11:57:01
> 2011 +0100
> @@ -65,7 +65,7 @@ void amd_iommu_share_p2m(struct domain *
> void amd_iommu_share_p2m(struct domain *d);
>
> /* device table functions */
> -int get_requestor_id(u16 bdf);
> +int get_dma_requestor_id(u16 bdf);
> void amd_iommu_add_dev_table_entry(
> u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass,
> u8 nmi_pass, u8 ext_int_pass, u8 init_pass);
> @@ -97,6 +97,7 @@ unsigned int amd_iommu_read_ioapic_from_
> unsigned int apic, unsigned int reg);
>
> extern int ioapic_bdf[MAX_IO_APICS];
> +extern void *shared_intremap_table;
>
> /* power management support */
> void amd_iommu_resume(void);
> diff -r 8d6edc3d26d2 xen/include/xen/iommu.h
> --- a/xen/include/xen/iommu.h Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/include/xen/iommu.h Tue Aug 16 11:57:01 2011 +0100
> @@ -32,6 +32,7 @@ extern bool_t iommu_snoop, iommu_qinval,
> extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
> extern bool_t iommu_hap_pt_share;
> extern bool_t iommu_debug;
> +extern bool_t amd_iommu_perdev_intremap;
>
> extern struct rangeset *mmio_ro_ranges;
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|