WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] x86/IRQ: prevent vector sharing within IO-APICs

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] x86/IRQ: prevent vector sharing within IO-APICs
From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Date: Fri, 11 Nov 2011 17:13:03 +0000
Cc:
Delivery-date: Fri, 11 Nov 2011 09:15:39 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4EBD54B80200007800060776@xxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <4EBD54B80200007800060776@xxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.23) Gecko/20110921 Lightning/1.0b2 Thunderbird/3.1.15
On 11/11/11 16:00, Jan Beulich wrote:
Following the prevention of vector sharing for MSIs, this change
enforces the same within IO-APICs: Pin based interrupts use the IO-APIC
as their identifying device under the AMD IOMMU (and just like for
MSIs, only the identifying device is used to remap interrupts here,
with no regard to an interrupt's destination).

Additionally, LAPIC initiated EOIs (for level triggered interrupts) too
use only the vector for identifying which interrupts to end. While this
generally causes no significant problem (at worst an interrupt would be
re-raised without a new interrupt event actually having occurred)

At worst, hardware asserts a line interrupt, deasserts it later, and an EOI broadcast gets rid of any record that the IRQ was ever raised.  While I would classify this as buggy behavior, I believe I have seen some hardware doing this when investigating the line level IRQ migration bug, as clearing the IRR did not immediately cause another interrupt to be generated.

, it
still seems better to avoid the situation.

For this second aspect, a distinction is being made between the
traditional and the directed-EOI cases: In the former, vectors should
not be shared throughout all IO-APICs in the system, while in the
latter case only individual IO-APICs need to be contrained (or, if the
firmware indicates so, sub- groups of them having the same GSI appear
at multiple pins).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Provisional nack because it is my understanding that under all circumstances, you must maintain a vector exclusivity map across all IO-APICs because of the broadcast problem.  Or have I made a mistake in my reasoning?

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -395,6 +395,11 @@ static vmask_t *irq_get_used_vector_mask
             }
         }
     }
+    else if ( IO_APIC_IRQ(irq) &&
+              opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
+    {
+        ret = io_apic_get_used_vector_map(irq);
+    }
 
     return ret;
 }
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -72,6 +72,33 @@ int __read_mostly nr_ioapics;
 #define io_apic_eoi_vector(apic, vector) io_apic_eoi((apic), (vector), -1)
 #define io_apic_eoi_pin(apic, pin) io_apic_eoi((apic), -1, (pin))
 
+static int apic_pin_2_gsi_irq(int apic, int pin);
+
+static vmask_t *__read_mostly vector_map[MAX_IO_APICS];
+
+static void share_vector_maps(unsigned int src, unsigned int dst)
+{
+    unsigned int pin;
+
+    if (vector_map[src] == vector_map[dst])
+        return;
+
+    bitmap_or(vector_map[src]->_bits, vector_map[src]->_bits,
+              vector_map[dst]->_bits, NR_VECTORS);
+
+    for (pin = 0; pin < nr_ioapic_entries[dst]; ++pin) {
+        int irq = apic_pin_2_gsi_irq(dst, pin);
+        struct irq_desc *desc;
+
+        if (irq < 0)
+            continue;
+        desc = irq_to_desc(irq);
+        if (desc->arch.used_vectors == vector_map[dst])
+            desc->arch.used_vectors = vector_map[src];
+    }
+
+    vector_map[dst] = vector_map[src];
+}
 
 /*
  * This is performance-critical, we want to do it O(1)
@@ -113,6 +140,7 @@ static void add_pin_to_irq(unsigned int 
     }
     entry->apic = apic;
     entry->pin = pin;
+    share_vector_maps(irq_2_pin[irq].apic, apic);
 }
 
 /*
@@ -128,6 +156,7 @@ static void __init replace_pin_at_irq(un
         if (entry->apic == oldapic && entry->pin == oldpin) {
             entry->apic = newapic;
             entry->pin = newpin;
+            share_vector_maps(oldapic, newapic);
         }
         if (!entry->next)
             break;
@@ -135,6 +164,16 @@ static void __init replace_pin_at_irq(un
     }
 }
 
+vmask_t *io_apic_get_used_vector_map(unsigned int irq)
+{
+    struct irq_pin_list *entry = irq_2_pin + irq;
+
+    if (entry->pin == -1)
+        return NULL;
+
+    return vector_map[entry->apic];
+}
+
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
     int apic;
@@ -1244,6 +1283,18 @@ static void __init enable_IO_APIC(void)
     for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].next = i + 1;
 
+    if (directed_eoi_enabled) {
+        for (apic = 0; apic < nr_ioapics; apic++) {
+            vector_map[apic] = xzalloc(vmask_t);
+            BUG_ON(!vector_map[apic]);
+        }
+    } else {
+        vector_map[0] = xzalloc(vmask_t);
+        BUG_ON(!vector_map[0]);
+        for (apic = 1; apic < nr_ioapics; apic++)
+            vector_map[apic] = vector_map[0];
+    }
+
     for(apic = 0; apic < nr_ioapics; apic++) {
         int pin;
         /* See if any of the pins is in ExtINT mode */
@@ -2345,13 +2396,12 @@ int ioapic_guest_write(unsigned long phy
     }
 
     if ( desc->arch.vector <= 0 || desc->arch.vector > LAST_DYNAMIC_VECTOR ) {
+        add_pin_to_irq(irq, apic, pin);
         vector = assign_irq_vector(irq);
         if ( vector < 0 )
             return vector;
 
         printk(XENLOG_INFO "allocated vector %02x for irq %d\n", vector, irq);
-
-        add_pin_to_irq(irq, apic, pin);
     }
     spin_lock(&dom0->event_lock);
     ret = map_domain_pirq(dom0, pirq, irq,
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -116,6 +116,7 @@ int i8259A_resume(void);
 void setup_IO_APIC(void);
 void disable_IO_APIC(void);
 void setup_ioapic_dest(void);
+vmask_t *io_apic_get_used_vector_map(unsigned int irq);
 
 extern unsigned int io_apic_irqs;
 



-- 
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel