# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1256564018 0
# Node ID 42e9682e4b6143462d8ebb8ba5bdd46144f90faf
# Parent f0be14bb3f7a4d9e9d865b3a4c1433a25051ee87
x86: IRQ Migration logic enhancement.
To programme MSI's addr/vector safely, delay irq migration
operation before acking next interrupt. In this way, it should
avoid inconsistent interrupts generation due to non-atomic writing
addr and data registers about MSI.
Port the logic from Linux and tailor it for Xen.
Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
---
xen/arch/x86/hpet.c | 1
xen/arch/x86/hvm/hvm.c | 2 -
xen/arch/x86/io_apic.c | 42 ++++++++++++++++++++++++++++
xen/arch/x86/irq.c | 61 ++++++++++++++++++++++++++++++++++++++++++
xen/arch/x86/msi.c | 4 --
xen/include/asm-x86/io_apic.h | 9 ++++++
xen/include/asm-x86/irq.h | 6 ++++
xen/include/xen/irq.h | 2 +
8 files changed, 122 insertions(+), 5 deletions(-)
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/hpet.c Mon Oct 26 13:33:38 2009 +0000
@@ -289,6 +289,7 @@ static void hpet_msi_ack(unsigned int ir
struct irq_desc *desc = irq_to_desc(irq);
irq_complete_move(&desc);
+ move_native_irq(irq);
ack_APIC_irq();
}
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c Mon Oct 26 13:33:38 2009 +0000
@@ -243,7 +243,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
continue;
irq = desc - irq_desc;
ASSERT(MSI_IRQ(irq));
- desc->handler->set_affinity(irq, *cpumask_of(v->processor));
+ irq_set_affinity(irq, *cpumask_of(v->processor));
spin_unlock_irq(&desc->lock);
}
spin_unlock(&d->event_lock);
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/io_apic.c Mon Oct 26 13:33:38 2009 +0000
@@ -1379,6 +1379,7 @@ static void ack_edge_ioapic_irq(unsigned
struct irq_desc *desc = irq_to_desc(irq);
irq_complete_move(&desc);
+ move_native_irq(irq);
if ((desc->status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
@@ -1418,6 +1419,38 @@ static void setup_ioapic_ack(char *s)
printk("Unknown ioapic_ack value specified: '%s'\n", s);
}
custom_param("ioapic_ack", setup_ioapic_ack);
+
+static bool_t io_apic_level_ack_pending(unsigned int irq)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ entry = &irq_2_pin[irq];
+ for (;;) {
+ unsigned int reg;
+ int pin;
+
+ if (!entry)
+ break;
+
+ pin = entry->pin;
+ if (pin == -1)
+ continue;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ /* Is the remote IRR bit set? */
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ return 1;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
static void mask_and_ack_level_ioapic_irq (unsigned int irq)
{
@@ -1456,6 +1489,10 @@ static void mask_and_ack_level_ioapic_ir
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
ack_APIC_irq();
+
+ if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+ !io_apic_level_ack_pending(irq))
+ move_native_irq(irq);
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
@@ -1503,6 +1540,10 @@ static void end_level_ioapic_irq (unsign
ack_APIC_irq();
+ if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+ !io_apic_level_ack_pending(irq))
+ move_native_irq(irq);
+
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
@@ -1564,6 +1605,7 @@ static void ack_msi_irq(unsigned int irq
struct irq_desc *desc = irq_to_desc(irq);
irq_complete_move(&desc);
+ move_native_irq(irq);
if ( msi_maskable_irq(desc->msi_desc) )
ack_APIC_irq(); /* ACKTYPE_NONE */
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/irq.c Mon Oct 26 13:33:38 2009 +0000
@@ -453,6 +453,67 @@ void __setup_vector_irq(int cpu)
vector = irq_to_vector(irq);
per_cpu(vector_irq, cpu)[vector] = irq;
}
+}
+
+void move_masked_irq(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ return;
+
+ desc->status &= ~IRQ_MOVE_PENDING;
+
+ if (unlikely(cpus_empty(desc->pending_mask)))
+ return;
+
+ if (!desc->handler->set_affinity)
+ return;
+
+ /*
+ * If there was a valid mask to work with, please
+ * do the disable, re-program, enable sequence.
+ * This is *not* particularly important for level triggered
+ * but in a edge trigger case, we might be setting rte
+ * when an active trigger is comming in. This could
+ * cause some ioapics to mal-function.
+ * Being paranoid i guess!
+ *
+ * For correct operation this depends on the caller
+ * masking the irqs.
+ */
+ if (likely(cpus_intersects(desc->pending_mask, cpu_online_map)))
+ desc->handler->set_affinity(irq, desc->pending_mask);
+
+ cpus_clear(desc->pending_mask);
+}
+
+void move_native_irq(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ return;
+
+ if (unlikely(desc->status & IRQ_DISABLED))
+ return;
+
+ desc->handler->disable(irq);
+ move_masked_irq(irq);
+ desc->handler->enable(irq);
+}
+
+/* For re-setting irq interrupt affinity for specific irq */
+void irq_set_affinity(int irq, cpumask_t mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (!desc->handler->set_affinity)
+ return;
+
+ ASSERT(spin_is_locked(&desc->lock));
+ desc->status |= IRQ_MOVE_PENDING;
+ cpus_copy(desc->pending_mask, mask);
}
asmlinkage void do_IRQ(struct cpu_user_regs *regs)
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/msi.c Mon Oct 26 13:33:38 2009 +0000
@@ -231,7 +231,6 @@ static void write_msi_msg(struct msi_des
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
- mask_msi_irq(entry->irq);
pci_conf_write32(bus, slot, func, msi_lower_address_reg(pos),
msg->address_lo);
if ( entry->msi_attrib.is_64 )
@@ -244,7 +243,6 @@ static void write_msi_msg(struct msi_des
else
pci_conf_write16(bus, slot, func, msi_data_reg(pos, 0),
msg->data);
- unmask_msi_irq(entry->irq);
break;
}
case PCI_CAP_ID_MSIX:
@@ -252,13 +250,11 @@ static void write_msi_msg(struct msi_des
void __iomem *base;
base = entry->mask_base;
- mask_msi_irq(entry->irq);
writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
writel(msg->address_hi,
base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
- unmask_msi_irq(entry->irq);
break;
}
default:
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/asm-x86/io_apic.h Mon Oct 26 13:33:38 2009 +0000
@@ -21,6 +21,15 @@
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
#define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid)
+
+/* I/O Unit Redirection Table */
+#define IO_APIC_REDIR_VECTOR_MASK 0x000FF
+#define IO_APIC_REDIR_DEST_LOGICAL 0x00800
+#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
+#define IO_APIC_REDIR_SEND_PENDING (1 << 12)
+#define IO_APIC_REDIR_REMOTE_IRR (1 << 14)
+#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
+#define IO_APIC_REDIR_MASKED (1 << 16)
/*
* The structure of the IO-APIC:
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/asm-x86/irq.h Mon Oct 26 13:33:38 2009 +0000
@@ -138,6 +138,12 @@ int __assign_irq_vector(int irq, struct
int bind_irq_vector(int irq, int vector, cpumask_t domain);
+void move_native_irq(int irq);
+
+void move_masked_irq(int irq);
+
+void irq_set_affinity(int irq, cpumask_t mask);
+
#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/xen/irq.h Mon Oct 26 13:33:38 2009 +0000
@@ -24,6 +24,7 @@ struct irqaction {
#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
+#define IRQ_MOVE_PENDING 64 /* IRQ is migrating to another CPUs */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
/* Special IRQ numbers. */
@@ -75,6 +76,7 @@ typedef struct irq_desc {
int irq;
spinlock_t lock;
cpumask_t affinity;
+ cpumask_t pending_mask; /* IRQ migration pending mask */
/* irq ratelimit */
s_time_t rl_quantum_start;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|