This is a poc patch of guest irq ratelimit. The main motivation is
to ensure Xen's responsiveness during an irq storm, that caused by
improper hardware or drivers. If one interrupt is fired at a high
rate (>10k/10ms in the poc), it is disabled until the next periodic
timer_fn. A global generation counter is used to minimize overhead.
Printks and configurability are not included in this patch. The
configurability may be:
1. command-line param to set ratelimit_threshold
2. per irq threshold
3. dynamic reconfig, maybe a hypercall or something
For preventing irq storms, #1 is likely enough, but not sure if this
has other usages.
Any comments?
Thanks,
Qing
---
diff -r 8f81bdd57afe xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Thu Sep 03 09:51:37 2009 +0100
+++ b/xen/arch/x86/irq.c Wed Sep 09 16:23:19 2009 +0800
@@ -54,6 +54,12 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq)
};
DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
+
+static LIST_HEAD(irq_ratelimit_list);
+static DEFINE_SPINLOCK(irq_ratelimit_lock);
+static struct timer irq_ratelimit_timer;
+static unsigned int irq_ratelimit_gen;
+static unsigned int irq_ratelimit_threshold = 10000;
/* Must be called when irq disabled */
void lock_vector_lock(void)
@@ -241,6 +247,10 @@ static void init_one_irq_desc(struct irq
desc->msi_desc = NULL;
spin_lock_init(&desc->lock);
cpus_setall(desc->affinity);
+
+ desc->rl_gen = 0;
+ desc->rl_cnt = 0;
+ INIT_LIST_HEAD(&desc->rl_link);
}
static void init_one_irq_status(int irq)
@@ -469,6 +479,29 @@ asmlinkage void do_IRQ(struct cpu_user_r
if ( likely(desc->status & IRQ_GUEST) )
{
+ static unsigned int last_gen;
+ last_gen = desc->rl_gen;
+ desc->rl_gen = irq_ratelimit_gen;
+ if ( last_gen != desc->rl_gen ) {
+ desc->rl_cnt = 0;
+ }
+ if ( unlikely(desc->rl_cnt++ >= irq_ratelimit_threshold) ) {
+ desc->handler->disable(irq);
+ /*
+ * If handler->disable doesn't actually mask the interrupt,
+ * a disabled irq still can fire. So if the irq is already
+ * in the ratelimit list, don't add it again. This also
+ * avoids deadlocks of two spinlocks if ratelimit_timer_fn
+ * runs at the same time.
+ */
+ if ( likely(list_empty(&desc->rl_link)) ) {
+ spin_lock(&irq_ratelimit_lock);
+ list_add(&desc->rl_link, &irq_ratelimit_list);
+ spin_unlock(&irq_ratelimit_lock);
+ }
+ goto out;
+ }
+
irq_enter();
tsc_in = tb_init_done ? get_cycles() : 0;
__do_IRQ_guest(irq);
@@ -511,6 +544,35 @@ asmlinkage void do_IRQ(struct cpu_user_r
spin_unlock(&desc->lock);
set_irq_regs(old_regs);
}
+
+static void irq_ratelimit_timer_fn(void *data)
+{
+ struct irq_desc *desc, *tmp;
+
+ irq_ratelimit_gen++;
+
+ spin_lock(&irq_ratelimit_lock);
+
+ list_for_each_entry_safe(desc, tmp, &irq_ratelimit_list, rl_link) {
+ spin_lock(&desc->lock);
+ desc->handler->enable(desc->irq);
+ list_del(&desc->rl_link);
+ spin_unlock(&desc->lock);
+ }
+
+ spin_unlock(&irq_ratelimit_lock);
+
+ set_timer(&irq_ratelimit_timer, NOW() + MILLISECS(10));
+}
+
+static int __init irq_ratelimit_init(void)
+{
+ init_timer(&irq_ratelimit_timer, irq_ratelimit_timer_fn, NULL, 0);
+ set_timer(&irq_ratelimit_timer, NOW() + MILLISECS(10));
+
+ return 0;
+}
+__initcall(irq_ratelimit_init);
int request_irq(unsigned int irq,
void (*handler)(int, void *, struct cpu_user_regs *),
diff -r 8f81bdd57afe xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Thu Sep 03 09:51:37 2009 +0100
+++ b/xen/include/xen/irq.h Wed Sep 09 16:23:19 2009 +0800
@@ -74,6 +74,10 @@ typedef struct irq_desc {
int irq;
spinlock_t lock;
cpumask_t affinity;
+
+ unsigned int rl_gen; /* last update */
+ unsigned int rl_cnt;
+ struct list_head rl_link;
} __cacheline_aligned irq_desc_t;
#if defined(__ia64__)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|