WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: IRQ affinity should track vCPU affin

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: IRQ affinity should track vCPU affinity
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 21 Jun 2010 10:45:10 -0700
Delivery-date: Mon, 21 Jun 2010 10:46:13 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1276755726 -3600
# Node ID 0695a5cdcb42d98dcd4bbda35614753787aa7983
# Parent  b9c541d9c13822e92719ccfe77fbd0241410f2c5
x86: IRQ affinity should track vCPU affinity

With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.

The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/ia64/xen/irq.c    |    5 +++
 xen/arch/x86/hvm/hvm.c     |    2 -
 xen/arch/x86/irq.c         |   22 +++++++++++----
 xen/common/event_channel.c |   65 ++++++++++++++++++++++++++++++++++++++++-----
 xen/common/sched_credit.c  |    8 ++++-
 xen/common/sched_credit2.c |    9 +++++-
 xen/common/sched_sedf.c    |    2 +
 xen/common/schedule.c      |    7 ++++
 xen/include/asm-x86/irq.h  |    2 -
 xen/include/xen/cpumask.h  |    2 -
 xen/include/xen/event.h    |    3 ++
 xen/include/xen/irq.h      |    1 
 xen/include/xen/sched-if.h |    1 
 xen/include/xen/sched.h    |    9 +++++-
 14 files changed, 120 insertions(+), 18 deletions(-)

diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/ia64/xen/irq.c   Thu Jun 17 07:22:06 2010 +0100
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
        }
 }
 
+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+       /* FIXME */
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Thu Jun 17 07:22:06 2010 +0100
@@ -271,7 +271,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        irq_set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(desc, cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/irq.c        Thu Jun 17 07:22:06 2010 +0100
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
 }
 
 /* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
-{
-    struct irq_desc *desc = irq_to_desc(irq);
-    
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
     if (!desc->handler->set_affinity)
         return;
     
     ASSERT(spin_is_locked(&desc->lock));
+    desc->status &= ~IRQ_MOVE_PENDING;
+    wmb();
+    cpus_copy(desc->pending_mask, *mask);
+    wmb();
     desc->status |= IRQ_MOVE_PENDING;
-    cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+    unsigned long flags;
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+    if ( !desc )
+        return;
+    irq_set_affinity(desc, mask);
+    spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 DEFINE_PER_CPU(unsigned int, irq_count);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/event_channel.c        Thu Jun 17 07:22:06 2010 +0100
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
 }
 
 
+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+    chn->u.pirq.prev_port = 0;
+    chn->u.pirq.next_port = v->pirq_evtchn_head;
+    if ( v->pirq_evtchn_head )
+        evtchn_from_port(v->domain, v->pirq_evtchn_head)
+            ->u.pirq.prev_port = port;
+    v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    if ( chn->u.pirq.prev_port )
+        evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+            chn->u.pirq.next_port;
+    else
+        v->pirq_evtchn_head = chn->u.pirq.next_port;
+    if ( chn->u.pirq.next_port )
+        evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+            chn->u.pirq.prev_port;
+}
+
+
 static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
+    struct vcpu   *v = d->vcpu[0];
     int            port, pirq = bind->pirq;
     long           rc;
 
@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
     chn = evtchn_from_port(d, port);
 
     d->pirq_to_evtchn[pirq] = port;
-    rc = pirq_guest_bind(d->vcpu[0], pirq, 
+    rc = pirq_guest_bind(v, pirq,
                          !!(bind->flags & BIND_PIRQ__WILL_SHARE));
     if ( rc != 0 )
     {
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
     }
 
     chn->state  = ECS_PIRQ;
-    chn->u.pirq = pirq;
+    chn->u.pirq.irq = pirq;
+    link_pirq_port(port, chn, v);
 
     bind->port = port;
 
@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
         break;
 
     case ECS_PIRQ:
-        pirq_guest_unbind(d1, chn1->u.pirq);
-        d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+        pirq_guest_unbind(d1, chn1->u.pirq.irq);
+        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+        unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
         break;
 
     case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
         break;
     case ECS_PIRQ:
         status->status = EVTCHNSTAT_pirq;
-        status->u.pirq = chn->u.pirq;
+        status->u.pirq = chn->u.pirq.irq;
         break;
     case ECS_VIRQ:
         status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
         break;
     case ECS_UNBOUND:
     case ECS_INTERDOMAIN:
+        chn->notify_vcpu_id = vcpu_id;
+        break;
     case ECS_PIRQ:
+        if ( chn->notify_vcpu_id == vcpu_id )
+            break;
+        unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
         chn->notify_vcpu_id = vcpu_id;
+        pirq_set_affinity(d, chn->u.pirq.irq,
+                          cpumask_of(d->vcpu[vcpu_id]->processor));
+        link_pirq_port(port, chn, d->vcpu[vcpu_id]);
         break;
     default:
         rc = -EINVAL;
@@ -1061,6 +1097,23 @@ void evtchn_destroy_final(struct domain 
     xfree(d->poll_mask);
     d->poll_mask = NULL;
 #endif
+}
+
+
+void evtchn_move_pirqs(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    const cpumask_t *mask = cpumask_of(v->processor);
+    unsigned int port;
+    struct evtchn *chn;
+
+    spin_lock(&d->event_lock);
+    for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+    {
+        chn = evtchn_from_port(d, port);
+        pirq_set_affinity(d, chn->u.pirq.irq, mask);
+    }
+    spin_unlock(&d->event_lock);
 }
 
 
@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
                    chn->u.interdomain.remote_port);
             break;
         case ECS_PIRQ:
-            printk(" p=%d", chn->u.pirq);
+            printk(" p=%d", chn->u.pirq.irq);
             break;
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit.c Thu Jun 17 07:22:06 2010 +0100
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,
 
 static struct csched_vcpu *
 csched_load_balance(struct csched_private *prv, int cpu,
-    struct csched_vcpu *snext)
+    struct csched_vcpu *snext, bool_t *stolen)
 {
     struct csched_vcpu *speer;
     cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
         speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
         if ( speer != NULL )
+        {
+            *stolen = 1;
             return speer;
+        }
     }
 
  out:
@@ -1269,6 +1272,7 @@ csched_schedule(
         BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 
     snext = __runq_elem(runq->next);
+    ret.migrated = 0;
 
     /* Tasklet work (which runs in idle VCPU context) overrides all else. */
     if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
     if ( snext->pri > CSCHED_PRI_TS_OVER )
         __runq_remove(snext);
     else
-        snext = csched_load_balance(prv, cpu, snext);
+        snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
 
     /*
      * Update idlers mask if necessary. When we're idling, other CPUs
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit2.c        Thu Jun 17 07:22:06 2010 +0100
@@ -991,10 +991,17 @@ csched_schedule(
     }
 #endif
 
+    ret.migrated = 0;
+
     if ( !is_idle_vcpu(snext->vcpu) )
     {
         snext->start_time = now;
-        snext->vcpu->processor = cpu; /* Safe because lock for old processor 
is held */
+        /* Safe because lock for old processor is held */
+        if ( snext->vcpu->processor != cpu )
+        {
+            snext->vcpu->processor = cpu;
+            ret.migrated = 1;
+        }
     }
 
     /*
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_sedf.c   Thu Jun 17 07:22:06 2010 +0100
@@ -874,6 +874,8 @@ static struct task_slice sedf_do_schedul
                ret.time);
         ret.time = EXTRA_QUANTUM;
     }
+
+    ret.migrated = 0;
 
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/schedule.c
--- a/xen/common/schedule.c     Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/schedule.c     Thu Jun 17 07:22:06 2010 +0100
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d, 
         cpus_setall(v->cpu_affinity);
         v->processor = new_p;
         v->sched_priv = vcpu_priv[v->vcpu_id];
+        evtchn_move_pirqs(v);
 
         new_p = cycle_cpu(new_p, c->cpu_valid);
     }
@@ -418,6 +419,9 @@ static void vcpu_migrate(struct vcpu *v)
     v->processor = new_cpu;
     spin_unlock_irqrestore(
         per_cpu(schedule_data, old_cpu).schedule_lock, flags);
+
+    if ( old_cpu != new_cpu )
+        evtchn_move_pirqs(v);
 
     /* Wake on new CPU. */
     vcpu_wake(v);
@@ -1094,6 +1098,9 @@ static void schedule(void)
 
     stop_timer(&prev->periodic_timer);
 
+    if ( next_slice.migrated )
+        evtchn_move_pirqs(next);
+
     /* Ensure that the domain has an up-to-date time base. */
     update_vcpu_system_time(next);
     vcpu_periodic_timer_work(next);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/asm-x86/irq.h Thu Jun 17 07:22:06 2010 +0100
@@ -143,7 +143,7 @@ void move_native_irq(int irq);
 
 void move_masked_irq(int irq);
 
-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
 
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/cpumask.h Thu Jun 17 07:22:06 2010 +0100
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
 }
 
 #define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
 {
        bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
 }
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/event.h
--- a/xen/include/xen/event.h   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/event.h   Thu Jun 17 07:22:06 2010 +0100
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
 /* Unmask a local event-channel port. */
 int evtchn_unmask(unsigned int port);
 
+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/irq.h     Thu Jun 17 07:22:06 2010 +0100
@@ -139,6 +139,7 @@ extern int pirq_guest_unmask(struct doma
 extern int pirq_guest_unmask(struct domain *d);
 extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
 extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
 extern irq_desc_t *domain_spin_lock_irq_desc(
     struct domain *d, int irq, unsigned long *pflags);
 
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched-if.h        Thu Jun 17 07:22:06 2010 +0100
@@ -79,6 +79,7 @@ struct task_slice {
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;
+    bool_t       migrated;
 };
 
 struct scheduler {
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched.h   Thu Jun 17 07:22:06 2010 +0100
@@ -61,7 +61,11 @@ struct evtchn
             u16            remote_port;
             struct domain *remote_dom;
         } interdomain; /* state == ECS_INTERDOMAIN */
-        u16 pirq;      /* state == ECS_PIRQ */
+        struct {
+            u16            irq;
+            u16            next_port;
+            u16            prev_port;
+        } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
 #ifdef FLASK_ENABLE
@@ -141,6 +145,9 @@ struct vcpu
      * < 0: multiple ports may be being polled.
      */
     int              poll_evtchn;
+
+    /* (over-)protected by ->domain->event_lock */
+    int              pirq_evtchn_head;
 
     unsigned long    pause_flags;
     atomic_t         pause_count;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: IRQ affinity should track vCPU affinity, Xen patchbot-unstable <=