On Sat, Oct 16, 2010 at 10:26 AM, Sander Eikelenboom
<linux@xxxxxxxxxxxxxx> wrote:
>
> Probably there are more problems, you could also try a xen-unstable from
> before the commit that changed this code (msi.c)
> Another thing that could make it eassier to debug would be to put some
> printk's around the WARN_ON's in msi.c at the linenumbers that gave the
> warnings, showing but parts of the equation in the WARN_ON
>
Good idea.
Here's the debug stuff I added (so the printk output will make sense):
diff -r 3a5755249361 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Thu Oct 14 12:46:29 2010 +0100
+++ b/xen/arch/x86/msi.c Sun Oct 17 13:18:06 2010 -0700
@@ -549,14 +549,14 @@
return 0;
if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
PCI_BASE_ADDRESS_MEM_TYPE_64 )
{
- addr &= ~PCI_BASE_ADDRESS_MEM_MASK;
+ addr &= PCI_BASE_ADDRESS_MEM_MASK;
if ( ++bir >= limit )
return 0;
return addr |
((u64)pci_conf_read32(bus, slot, func,
PCI_BASE_ADDRESS_0 + bir * 4) << 32);
}
- return addr & ~PCI_BASE_ADDRESS_MEM_MASK;
+ return addr & PCI_BASE_ADDRESS_MEM_MASK;
}
/**
@@ -633,7 +633,15 @@
u32 pba_offset;
ASSERT(!dev->msix_used_entries);
- WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
+ WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func,
bir)); // XXX
+ if(msi->table_base != read_pci_mem_bar(bus, slot, func, bir)); {
+ printk(
"==================================================\n");
+ printk( "msi->table_base !=
read_pci_mem_bar(bus, slot, func, bir)\n");
+ printk( "msi->table_base = %0lx\n", msi->table_base );
+ printk( "read_pci_mem_bar = %0lx\n",
read_pci_mem_bar(bus, slot, func, bir) );
+ printk( "bus=%0x, slot=%0x, func=%0x,
bir=%0x\n", bus, slot, func, bir);
+ printk(
"==================================================\n\n");
+ }
dev->msix_nr_entries = nr_entries;
dev->msix_table.first = PFN_DOWN(table_paddr);
@@ -646,14 +654,27 @@
msix_pba_offset_reg(pos));
bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
- WARN_ON(!pba_paddr);
+ WARN_ON(!pba_paddr); // XXX
+ if (!pba_paddr) {
+ printk(
"==================================================\n");
+ printk( "No pba_addr: bus=%0x, slot=%0x,
func=%0x, bir=%0x\n", bus, slot, func, bir);
+ printk(
"==================================================\n\n");
+ }
pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
dev->msix_pba.first = PFN_DOWN(pba_paddr);
dev->msix_pba.last = PFN_DOWN(pba_paddr +
BITS_TO_LONGS(nr_entries) - 1);
WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.first,
- dev->msix_pba.last));
+ dev->msix_pba.last)); // XXX
+ if ( ! rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.first,
+ dev->msix_pba.last)) {
+ printk(
"==================================================\n");
+ printk( "rangeset_overlaps_range\n" );
+ printk( "mmio_ro_ranges = %p,
dev->msix_pba.first = %0lx, dev->msix_pba.last = %0lx\n",
+ mmio_ro_ranges,
dev->msix_pba.first, dev->msix_pba.last);
+ printk(
"==================================================\n\n");
+ }
if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first,
dev->msix_table.last) )
The boot log from this patched msi.c is attached. Let me know what
else I can add to help track down this issue.
Also, here's the pci config of dom0, although I think it's the NIC's
that are responsible for this:
00:00.0 Host bridge: Intel Corporation 5520/5500/X58 I/O Hub to ESI
Port (rev 12)
00:01.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI
Express Root Port 1 (rev 12)
00:03.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI
Express Root Port 3 (rev 12)
00:05.0 PCI bridge: Intel Corporation 5520/X58 I/O Hub PCI Express
Root Port 5 (rev 12)
00:07.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI
Express Root Port 7 (rev 12)
00:09.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI
Express Root Port 9 (rev 12)
00:14.0 PIC: Intel Corporation 5520/5500/X58 I/O Hub System Management
Registers (rev 12)
00:14.1 PIC: Intel Corporation 5520/5500/X58 I/O Hub GPIO and Scratch
Pad Registers (rev 12)
00:14.2 PIC: Intel Corporation 5520/5500/X58 I/O Hub Control Status
and RAS Registers (rev 12)
00:14.3 PIC: Intel Corporation 5520/5500/X58 I/O Hub Throttle Registers (rev 12)
00:16.0 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.1 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.2 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.3 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.4 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.5 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.6 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:16.7 System peripheral: Intel Corporation 5520/5500/X58 Chipset
QuickData Technology Device (rev 12)
00:1a.0 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #4
00:1a.1 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #5
00:1a.2 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #6
00:1a.7 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB2
EHCI Controller #2
00:1c.0 PCI bridge: Intel Corporation 82801JI (ICH10 Family) PCI
Express Root Port 1
00:1c.1 PCI bridge: Intel Corporation 82801JI (ICH10 Family) PCI Express Port 2
00:1d.0 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #1
00:1d.1 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #2
00:1d.2 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB
UHCI Controller #3
00:1d.7 USB Controller: Intel Corporation 82801JI (ICH10 Family) USB2
EHCI Controller #1
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
00:1f.0 ISA bridge: Intel Corporation 82801JIR (ICH10R) LPC Interface Controller
00:1f.2 RAID bus controller: Intel Corporation 82801 SATA RAID Controller
00:1f.3 SMBus: Intel Corporation 82801JI (ICH10 Family) SMBus Controller
01:00.0 Fibre Channel: PMC-Sierra Inc. Device 8032 (rev 05)
01:00.1 Fibre Channel: PMC-Sierra Inc. Device 8032 (rev 05)
04:00.0 Fibre Channel: PMC-Sierra Inc. Device 8032 (rev 05)
04:00.1 Fibre Channel: PMC-Sierra Inc. Device 8032 (rev 05)
05:00.0 SCSI storage controller: LSI Logic / Symbios Logic MegaRAID
SAS 8208ELP/8208ELP (rev 08)
06:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection
07:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection
08:04.0 VGA compatible controller: Matrox Graphics, Inc. MGA G200eW
WPCM450 (rev 0a)
ff:00.0 Host bridge: Intel Corporation Xeon 5500/Core i7 QuickPath
Architecture Generic Non-Core Registers (rev 04)
ff:00.1 Host bridge: Intel Corporation Xeon 5500/Core i7 QuickPath
Architecture System Address Decoder (rev 04)
ff:02.0 Host bridge: Intel Corporation Xeon 5500/Core i7 QPI Link 0 (rev 04)
ff:02.1 Host bridge: Intel Corporation Xeon 5500/Core i7 QPI Physical 0 (rev 04)
ff:03.0 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller (rev 04)
ff:03.1 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Target Address Decoder (rev 04)
ff:03.4 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Test Registers (rev 04)
ff:04.0 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 0 Control Registers (rev 04)
ff:04.1 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 0 Address Registers (rev 04)
ff:04.2 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 0 Rank Registers (rev 04)
ff:04.3 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 0 Thermal Control Registers (rev 04)
ff:05.0 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 1 Control Registers (rev 04)
ff:05.1 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 1 Address Registers (rev 04)
ff:05.2 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 1 Rank Registers (rev 04)
ff:05.3 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 1 Thermal Control Registers (rev 04)
ff:06.0 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 2 Control Registers (rev 04)
ff:06.1 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 2 Address Registers (rev 04)
ff:06.2 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 2 Rank Registers (rev 04)
ff:06.3 Host bridge: Intel Corporation Xeon 5500/Core i7 Integrated
Memory Controller Channel 2 Thermal Control Registers (rev 04)
Thanks
-Bruce
>
> --
>
> Sander
>
> Saturday, October 16, 2010, 7:14:11 PM, you wrote:
>
> > On Sat, Oct 16, 2010 at 9:29 AM, Sander Eikelenboom
> > <linux@xxxxxxxxxxxxxx> wrote:
> >> Hi Bruce,
> >>
> >> I tripped over the same warning trying to solve my freezes.
> >> Jan Beulich has posted a patch which is not in xen-unstable yet:
> >> [Xen-devel] [PATCH] x86/msi: fix inverted masks in c/s 22182:68cc3c514a0a
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> >>
> >> --- a/xen/arch/x86/msi.c
> >> +++ b/xen/arch/x86/msi.c
> >> @@ -549,14 +549,14 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
> >> return 0;
> >> if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> >> PCI_BASE_ADDRESS_MEM_TYPE_64 )
> >> {
> >> - addr &= ~PCI_BASE_ADDRESS_MEM_MASK;
> >> + addr &= PCI_BASE_ADDRESS_MEM_MASK;
> >> if ( ++bir >= limit )
> >> return 0;
> >> return addr |
> >> ((u64)pci_conf_read32(bus, slot, func,
> >> PCI_BASE_ADDRESS_0 + bir * 4) << 32);
> >> }
> >> - return addr & ~PCI_BASE_ADDRESS_MEM_MASK;
> >> + return addr & PCI_BASE_ADDRESS_MEM_MASK;
> >> }
> >>
> >> /**
> >>
> >>
> >>
> >> That fixes the warn, but my machine still keeps freezing non the less.
> >> (but it also does so with pci=nomsi so it's not msi specific in my case)
> >>
> >> --
> >>
> >> Sander
>
> > Hi Sander,
>
> > Thank you. I tried it against 4.1.0-22240 with no effect.
> > I confirmed I had the right patch:
>
> 0 %>> hg diff xen/arch/x86/msi.c
>
> > diff -r 38ad3633ecaf xen/arch/x86/msi.c
> > --- a/xen/arch/x86/msi.c Wed Oct 13 12:01:30 2010 +0100
> > +++ b/xen/arch/x86/msi.c Sat Oct 16 10:12:31 2010 -0700
> > @@ -549,14 +549,14 @@
> > return 0;
> > if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> > PCI_BASE_ADDRESS_MEM_TYPE_64 )
> > {
> > - addr &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > + addr &= PCI_BASE_ADDRESS_MEM_MASK;
> > if ( ++bir >= limit )
> > return 0;
> > return addr |
> > ((u64)pci_conf_read32(bus, slot, func,
> > PCI_BASE_ADDRESS_0 + bir * 4) << 32);
> > }
> > - return addr & ~PCI_BASE_ADDRESS_MEM_MASK;
> > + return addr & PCI_BASE_ADDRESS_MEM_MASK;
> > }
>
> > /**
>
> > The boot time msi warn messages were unchanged.
>
> > -Bruce
>
> >>
> >> Saturday, October 16, 2010, 6:14:17 PM, you wrote:
> >>
> >>> On Mon, Oct 11, 2010 at 2:05 PM, Bruce Edge <bruce.edge@xxxxxxxxx> wrote:
> >>>> On Mon, Oct 11, 2010 at 10:12 AM, Gianni Tedesco
> >>>> <gianni.tedesco@xxxxxxxxxx> wrote:
> >>>>> On Fri, 2010-10-08 at 10:33 +0100, Gianni Tedesco wrote:
> >>>>>> Hi,
> >>>>>>
> >>>>>> I've been trying to boot stefano's minimal dom0 kernel from
> >>>>>> git://xenbits.xen.org/people/sstabellini/linux-pvhvm.git
> >>>>>> 2.6.36-rc1-initial-domain-v2+pat
> >>>>>>
> >>>>>> On xen-unstable, I get the following WARN_ON()'s from Xen when bringing
> >>>>>> up the NIC's, then the machine hangs forever when trying to login
> >>>>>> either
> >>>>>> over serial or NIC.
> >>>>>>
> >>>>>> (XEN) Xen WARN at msi.c:649
> >>>>
> >>>> I get the same Xen WARN messages using the current pvops/xen-next with
> >>>> xen-unstable, here's the complete list for one boot, grep'd for WARN:
> >>>>
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:656
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:656
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:656
> >>>> (XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) 0000000080287db8 0(XEN) Xen WARN at msi.c:636
> >>>> (XEN) Xen WARN at msi.c:649
> >>>> (XEN) Xen WARN at msi.c:656
> >>>>
> >>>> The complete boot seq is attached.
> >>>>
> >>>> I do get a login at the end of the boot seq though.
> >>>> My situation goes pear shaped when I try start a pv domU. The dom0
> >>>> locks up after printing this on the console:
> >>>>
> >>>> (XEN) tmem: all pools frozen for all domains
> >>>> (XEN) tmem: all pools thawed for all domains
> >>>> (XEN) tmem: all pools frozen for all domains
> >>>> (XEN) tmem: all pools thawed for all domains
> >>>> mapping kernel into physical memory
> >>>> about to get started...
> >>>>
> >>>> then prints these once a minute:
> >>>> [ 589.490894] BUG: soft lockup - CPU#0 stuck for 61s! [swapper:0]
> >>>>
> >>>> The xen console is still active and I can generate a diag dump, also
> >>>> attached.
> >>>>
> >>>> This dom0 lockup behavior started with pv-ops 2.6.32.21, all the way
> >>>> to .24, rendering the later pvops kernels unusable for dom0.
> >>>> The 2.6.32.18 kernel is the last one that functioned as a dom0.
> >>>>
> >>>> This behavior is consistent on platforms, HP proliant 380DL G6, and
> >>>> G7, as well as i7 supermicros.
> >>>>
> >>>> -Bruce
> >>>>
> >>>>>
> >>>>> Hmm so this appears not to be an issue with XCP kernel, in that case I
> >>>>> get the warnings but everything still works fine.
> >>>>>
> >>>>> I will investigate further when I have some time.
> >>>>>
> >>>>> Gianni
> >>>>>
> >>>>>
> >>>>> _______________________________________________
> >>>>> Xen-devel mailing list
> >>>>> Xen-devel@xxxxxxxxxxxxxxxxxxx
> >>>>> http://lists.xensource.com/xen-devel
> >>>>>
> >>>>
> >>
> >>> The latest xen-unstable, 22240 has the same " (XEN) Xen WARN at
> >>> msi.c:636 " messages with associated stack traces.
> >>
> >>> I spent a little more time working with this version, and except for
> >>> these disconcerting messages, which do look like they are initiated by
> >>> the ethernet card discovery, the system appears functional.
> >>> In all cases the first occurrence is immediately after the NIC discovery:
> >>
> >>> e1000e: Intel(R) PRO/1000 Network Driver - 1.0.2-k2
> >>> | e1000e: Copyright (c) 1999-2008 Intel Corporation.
> >>> | xen: registering gsi 16 triggering 0 polarity 1
> >>> | xen_allocate_pirq: returning irq 16 for gsi 16
> >>> xen: --> irq=16
> >>> Already setup the GSI :16
> >>> e1000e 0000:06:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
> >>> e1000e 0000:06:00.0: setting latency timer to 64
> >>> alloc irq_desc for 493 on node 0
> >>> alloc kstat_irqs on node 0
> >>> (XEN) Xen WARN at msi.c:636
> >>> (XEN) ----[ Xen-4.1-unstable x86_64 debug=y Not tainted ]----
> >>> ....
> >>
> >>> In case it's a NIC specific issue, I'm seeing it with both
> >>> 06:00.0 Ethernet controller: Intel Corporation 82574L Gigabit
> >>> Network Connection
> >>> and
> >>> 02:00.0 Ethernet controller: Broadcom Corporation NetXtreme II
> >>> BCM5709 Gigabit Ethernet (rev 20)
> >>> NICs
> >>
> >>> -Bruce
> >>
> >>
> >>
> >>
> >>
> >> --
> >> Best regards,
> >> Sander mailto:linux@xxxxxxxxxxxxxx
> >>
> >>
>
>
>
> --
> Best regards,
> Sander mailto:linux@xxxxxxxxxxxxxx
>
patched-xen-boot-warn.log
Description: Text Data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|