With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.
The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- 2010-06-15.orig/xen/arch/ia64/xen/irq.c 2009-10-29 12:24:48.000000000
+0100
+++ 2010-06-15/xen/arch/ia64/xen/irq.c 2010-06-15 09:21:05.000000000 +0200
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
}
}
+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+ /* FIXME */
+}
+
/*
* Exit an interrupt context. Process softirqs if needed and possible:
*/
--- 2010-06-15.orig/xen/arch/x86/hvm/hvm.c 2010-06-11 11:41:35.000000000
+0200
+++ 2010-06-15/xen/arch/x86/hvm/hvm.c 2010-06-15 09:21:05.000000000 +0200
@@ -270,7 +270,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
continue;
irq = desc - irq_desc;
ASSERT(MSI_IRQ(irq));
- irq_set_affinity(irq, *cpumask_of(v->processor));
+ irq_set_affinity(desc, cpumask_of(v->processor));
spin_unlock_irq(&desc->lock);
}
spin_unlock(&d->event_lock);
--- 2010-06-15.orig/xen/arch/x86/irq.c 2010-06-11 11:41:35.000000000 +0200
+++ 2010-06-15/xen/arch/x86/irq.c 2010-06-15 09:21:05.000000000 +0200
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
}
/* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
if (!desc->handler->set_affinity)
return;
ASSERT(spin_is_locked(&desc->lock));
+ desc->status &= ~IRQ_MOVE_PENDING;
+ wmb();
+ cpus_copy(desc->pending_mask, *mask);
+ wmb();
desc->status |= IRQ_MOVE_PENDING;
- cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+ unsigned long flags;
+ struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+ if ( !desc )
+ return;
+ irq_set_affinity(desc, mask);
+ spin_unlock_irqrestore(&desc->lock, flags);
}
DEFINE_PER_CPU(unsigned int, irq_count);
--- 2010-06-15.orig/xen/common/event_channel.c 2010-06-11 11:41:35.000000000
+0200
+++ 2010-06-15/xen/common/event_channel.c 2010-06-15 09:21:05.000000000
+0200
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
}
+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+ chn->u.pirq.prev_port = 0;
+ chn->u.pirq.next_port = v->pirq_evtchn_head;
+ if ( v->pirq_evtchn_head )
+ evtchn_from_port(v->domain, v->pirq_evtchn_head)
+ ->u.pirq.prev_port = port;
+ v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ if ( chn->u.pirq.prev_port )
+ evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+ chn->u.pirq.next_port;
+ else
+ v->pirq_evtchn_head = chn->u.pirq.next_port;
+ if ( chn->u.pirq.next_port )
+ evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+ chn->u.pirq.prev_port;
+}
+
+
static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
{
struct evtchn *chn;
struct domain *d = current->domain;
+ struct vcpu *v = d->vcpu[0];
int port, pirq = bind->pirq;
long rc;
@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
chn = evtchn_from_port(d, port);
d->pirq_to_evtchn[pirq] = port;
- rc = pirq_guest_bind(d->vcpu[0], pirq,
+ rc = pirq_guest_bind(v, pirq,
!!(bind->flags & BIND_PIRQ__WILL_SHARE));
if ( rc != 0 )
{
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
}
chn->state = ECS_PIRQ;
- chn->u.pirq = pirq;
+ chn->u.pirq.irq = pirq;
+ link_pirq_port(port, chn, v);
bind->port = port;
@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
break;
case ECS_PIRQ:
- pirq_guest_unbind(d1, chn1->u.pirq);
- d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+ pirq_guest_unbind(d1, chn1->u.pirq.irq);
+ d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+ unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
break;
case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
break;
case ECS_PIRQ:
status->status = EVTCHNSTAT_pirq;
- status->u.pirq = chn->u.pirq;
+ status->u.pirq = chn->u.pirq.irq;
break;
case ECS_VIRQ:
status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
break;
case ECS_UNBOUND:
case ECS_INTERDOMAIN:
+ chn->notify_vcpu_id = vcpu_id;
+ break;
case ECS_PIRQ:
+ if ( chn->notify_vcpu_id == vcpu_id )
+ break;
+ unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
chn->notify_vcpu_id = vcpu_id;
+ pirq_set_affinity(d, chn->u.pirq.irq,
+ cpumask_of(d->vcpu[vcpu_id]->processor));
+ link_pirq_port(port, chn, d->vcpu[vcpu_id]);
break;
default:
rc = -EINVAL;
@@ -1064,6 +1100,23 @@ void evtchn_destroy_final(struct domain
}
+void evtchn_move_pirqs(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ const cpumask_t *mask = cpumask_of(v->processor);
+ unsigned int port;
+ struct evtchn *chn;
+
+ spin_lock(&d->event_lock);
+ for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+ {
+ chn = evtchn_from_port(d, port);
+ pirq_set_affinity(d, chn->u.pirq.irq, mask);
+ }
+ spin_unlock(&d->event_lock);
+}
+
+
static void domain_dump_evtchn_info(struct domain *d)
{
unsigned int port;
@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
chn->u.interdomain.remote_port);
break;
case ECS_PIRQ:
- printk(" p=%d", chn->u.pirq);
+ printk(" p=%d", chn->u.pirq.irq);
break;
case ECS_VIRQ:
printk(" v=%d", chn->u.virq);
--- 2010-06-15.orig/xen/common/schedule.c 2010-06-11 11:41:35.000000000
+0200
+++ 2010-06-15/xen/common/schedule.c 2010-06-15 09:21:05.000000000 +0200
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d,
cpus_setall(v->cpu_affinity);
v->processor = new_p;
v->sched_priv = vcpu_priv[v->vcpu_id];
+ evtchn_move_pirqs(v);
new_p = cycle_cpu(new_p, c->cpu_valid);
}
@@ -419,6 +420,9 @@ static void vcpu_migrate(struct vcpu *v)
spin_unlock_irqrestore(
per_cpu(schedule_data, old_cpu).schedule_lock, flags);
+ if ( old_cpu != new_cpu )
+ evtchn_move_pirqs(v);
+
/* Wake on new CPU. */
vcpu_wake(v);
}
@@ -1094,6 +1098,9 @@ static void schedule(void)
stop_timer(&prev->periodic_timer);
+ if ( next_slice.migrated )
+ evtchn_move_pirqs(next);
+
/* Ensure that the domain has an up-to-date time base. */
update_vcpu_system_time(next);
vcpu_periodic_timer_work(next);
--- 2010-06-15.orig/xen/common/sched_credit.c 2010-05-20 09:59:27.000000000
+0200
+++ 2010-06-15/xen/common/sched_credit.c 2010-06-15 09:21:05.000000000
+0200
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,
static struct csched_vcpu *
csched_load_balance(struct csched_private *prv, int cpu,
- struct csched_vcpu *snext)
+ struct csched_vcpu *snext, bool_t *stolen)
{
struct csched_vcpu *speer;
cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
if ( speer != NULL )
+ {
+ *stolen = 1;
return speer;
+ }
}
out:
@@ -1269,6 +1272,7 @@ csched_schedule(
BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
snext = __runq_elem(runq->next);
+ ret.migrated = 0;
/* Tasklet work (which runs in idle VCPU context) overrides all else. */
if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(prv, cpu, snext);
+ snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
/*
* Update idlers mask if necessary. When we're idling, other CPUs
--- 2010-06-15.orig/xen/common/sched_credit2.c 2010-05-20 09:59:27.000000000
+0200
+++ 2010-06-15/xen/common/sched_credit2.c 2010-06-15 09:22:13.000000000
+0200
@@ -991,10 +991,17 @@ csched_schedule(
}
#endif
+ ret.migrated = 0;
+
if ( !is_idle_vcpu(snext->vcpu) )
{
snext->start_time = now;
- snext->vcpu->processor = cpu; /* Safe because lock for old processor
is held */
+ /* Safe because lock for old processor is held */
+ if ( snext->vcpu->processor != cpu )
+ {
+ snext->vcpu->processor = cpu;
+ ret.migrated = 1;
+ }
}
/*
--- 2010-06-15.orig/xen/common/sched_sedf.c 2010-05-20 09:59:27.000000000
+0200
+++ 2010-06-15/xen/common/sched_sedf.c 2010-06-15 09:21:05.000000000 +0200
@@ -875,6 +875,8 @@ static struct task_slice sedf_do_schedul
ret.time = EXTRA_QUANTUM;
}
+ ret.migrated = 0;
+
EDOM_INFO(ret.task)->sched_start_abs = now;
CHECK(ret.time > 0);
ASSERT(sedf_runnable(ret.task));
--- 2010-06-15.orig/xen/include/asm-x86/irq.h 2009-12-16 09:14:13.000000000
+0100
+++ 2010-06-15/xen/include/asm-x86/irq.h 2010-06-15 09:21:05.000000000
+0200
@@ -143,7 +143,7 @@ void move_native_irq(int irq);
void move_masked_irq(int irq);
-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
--- 2010-06-15.orig/xen/include/xen/cpumask.h 2010-05-17 08:45:28.000000000
+0200
+++ 2010-06-15/xen/include/xen/cpumask.h 2010-06-15 09:21:05.000000000
+0200
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
}
#define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
{
bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
}
--- 2010-06-15.orig/xen/include/xen/event.h 2010-06-11 11:41:35.000000000
+0200
+++ 2010-06-15/xen/include/xen/event.h 2010-06-15 09:21:05.000000000 +0200
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
/* Unmask a local event-channel port. */
int evtchn_unmask(unsigned int port);
+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
/* Allocate/free a Xen-attached event channel port. */
int alloc_unbound_xen_event_channel(
struct vcpu *local_vcpu, domid_t remote_domid);
--- 2010-06-15.orig/xen/include/xen/irq.h 2009-10-29 12:24:49.000000000
+0100
+++ 2010-06-15/xen/include/xen/irq.h 2010-06-15 09:21:05.000000000 +0200
@@ -138,6 +138,7 @@ extern int pirq_guest_eoi(struct domain
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
extern irq_desc_t *domain_spin_lock_irq_desc(
struct domain *d, int irq, unsigned long *pflags);
--- 2010-06-15.orig/xen/include/xen/sched.h 2010-06-14 08:49:36.000000000
+0200
+++ 2010-06-15/xen/include/xen/sched.h 2010-06-15 09:21:05.000000000 +0200
@@ -61,7 +61,11 @@ struct evtchn
u16 remote_port;
struct domain *remote_dom;
} interdomain; /* state == ECS_INTERDOMAIN */
- u16 pirq; /* state == ECS_PIRQ */
+ struct {
+ u16 irq;
+ u16 next_port;
+ u16 prev_port;
+ } pirq; /* state == ECS_PIRQ */
u16 virq; /* state == ECS_VIRQ */
} u;
#ifdef FLASK_ENABLE
@@ -142,6 +146,9 @@ struct vcpu
*/
int poll_evtchn;
+ /* (over-)protected by ->domain->event_lock */
+ int pirq_evtchn_head;
+
unsigned long pause_flags;
atomic_t pause_count;
--- 2010-06-15.orig/xen/include/xen/sched-if.h 2010-05-20 09:59:27.000000000
+0200
+++ 2010-06-15/xen/include/xen/sched-if.h 2010-06-15 09:21:05.000000000
+0200
@@ -79,6 +79,7 @@ static inline void vcpu_schedule_unlock(
struct task_slice {
struct vcpu *task;
s_time_t time;
+ bool_t migrated;
};
struct scheduler {
guest-irq-affinity.patch
Description: Text document
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|