As pointed out in http://lists.xensource.com/archives/html/xen-devel/2010-07/msg00077.html the limits introduced in c/s 20072 are at least questionable. Eliminate them in favor of a more dynamic approach: There's no real need for an upper limit on nr_irqs (as anything beyond nr_irqs_gsi isn't visible to domains anyway), and the split point (and hence ratio) between GSI and MSI/MSI-X IRQs doesn't need to be hard coded, but can instead be controlled on the command line in case there are *very* many GSIs. The default used for nr_irqs will be rather large with this patch, so it may not be acceptable without also switching to a sparse irq_desc[] as was done not so lomg ago in Linux. The added capping of any domain's nr_pirqs is based on the observation that no domain can possibly have more than the system wide number of IRQs. The opposite case may in fact also require some adjustment: Defaulting the number of non-GSI IRQs available (namely to Dom0) to a fixed value may not be the best choice going forward, since if there indeed are very many non-GSI interrupt sources, it won't be possible for the kernel to make use of them without giving "extra_guest_irqs=" on the command line (but the goal should be to allow things to work right by default even on large systems). Signed-off-by: Jan Beulich --- 2010-08-12.orig/xen/arch/x86/io_apic.c 2010-08-06 08:44:33.000000000 +0200 +++ 2010-08-12/xen/arch/x86/io_apic.c 2010-08-12 17:01:37.000000000 +0200 @@ -2503,6 +2503,9 @@ void dump_ioapic_irq_info(void) unsigned highest_gsi(void); +static unsigned int __initdata max_gsi_irqs; +integer_param("max_gsi_irqs", max_gsi_irqs); + void __init init_ioapic_mappings(void) { unsigned long ioapic_phys; @@ -2547,19 +2550,37 @@ void __init init_ioapic_mappings(void) nr_irqs_gsi = max(nr_irqs_gsi, highest_gsi()); + if ( max_gsi_irqs == 0 ) + max_gsi_irqs = nr_irqs ? nr_irqs / 8 : PAGE_SIZE; + else if ( nr_irqs != 0 && max_gsi_irqs > nr_irqs ) + { + printk(XENLOG_WARNING "\"max_gsi_irqs=\" cannot be specified larger" + " than \"nr_irqs=\"\n"); + max_gsi_irqs = nr_irqs; + } + if ( max_gsi_irqs < 16 ) + max_gsi_irqs = 16; + + /* for PHYSDEVOP_pirq_eoi_gmfn guest assumptions */ + if ( max_gsi_irqs > PAGE_SIZE * 8 ) + max_gsi_irqs = PAGE_SIZE * 8; + if ( !smp_found_config || skip_ioapic_setup || nr_irqs_gsi < 16 ) nr_irqs_gsi = 16; - else if ( nr_irqs_gsi > MAX_GSI_IRQS) + else if ( nr_irqs_gsi > max_gsi_irqs ) { - /* for PHYSDEVOP_pirq_eoi_gmfn guest assumptions */ - printk(KERN_WARNING "Limiting number of GSI IRQs found (%u) to %lu\n", - nr_irqs_gsi, MAX_GSI_IRQS); - nr_irqs_gsi = MAX_GSI_IRQS; + printk(XENLOG_WARNING "Limiting to %u GSI IRQs (found %u)\n", + max_gsi_irqs, nr_irqs_gsi); + nr_irqs_gsi = max_gsi_irqs; } - if (nr_irqs < 2 * nr_irqs_gsi) - nr_irqs = 2 * nr_irqs_gsi; - - if (nr_irqs > MAX_NR_IRQS) - nr_irqs = MAX_NR_IRQS; + if ( nr_irqs == 0 ) + nr_irqs = cpu_has_apic ? + max(16U + num_present_cpus() * NR_DYNAMIC_VECTORS, + 8 * nr_irqs_gsi) : + nr_irqs_gsi; + else if ( nr_irqs < 16 ) + nr_irqs = 16; + printk(XENLOG_INFO "IRQ limits: %u GSI, %u MSI/MSI-X\n", + nr_irqs_gsi, nr_irqs - nr_irqs_gsi); } --- 2010-08-12.orig/xen/arch/x86/irq.c 2010-07-05 08:49:19.000000000 +0200 +++ 2010-08-12/xen/arch/x86/irq.c 2010-08-12 17:01:37.000000000 +0200 @@ -29,7 +29,7 @@ int __read_mostly opt_noirqbalance = 0; boolean_param("noirqbalance", opt_noirqbalance); unsigned int __read_mostly nr_irqs_gsi = 16; -unsigned int __read_mostly nr_irqs = 1024; +unsigned int __read_mostly nr_irqs; integer_param("nr_irqs", nr_irqs); u8 __read_mostly *irq_vector; --- 2010-08-12.orig/xen/common/domain.c 2010-08-06 08:44:34.000000000 +0200 +++ 2010-08-12/xen/common/domain.c 2010-08-12 17:01:37.000000000 +0200 @@ -274,6 +274,8 @@ struct domain *domain_create( d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs; else d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs; + if ( d->nr_pirqs > nr_irqs ) + d->nr_pirqs = nr_irqs; d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs); d->pirq_mask = xmalloc_array( --- 2010-08-12.orig/xen/include/asm-x86/irq.h 2010-07-05 13:42:16.000000000 +0200 +++ 2010-08-12/xen/include/asm-x86/irq.h 2010-08-12 17:01:37.000000000 +0200 @@ -23,9 +23,6 @@ #define irq_to_desc(irq) (&irq_desc[irq]) #define irq_cfg(irq) (&irq_cfg[irq]) -#define MAX_GSI_IRQS PAGE_SIZE * 8 -#define MAX_NR_IRQS (2 * MAX_GSI_IRQS) - struct irq_cfg { int vector; cpumask_t domain;