WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH, RFC] x86: IRQ affinity should track vCPU affinity

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH, RFC] x86: IRQ affinity should track vCPU affinity
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Tue, 15 Jun 2010 13:28:02 +0100
Delivery-date: Tue, 15 Jun 2010 05:28:56 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.

The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2010-06-15.orig/xen/arch/ia64/xen/irq.c     2009-10-29 12:24:48.000000000 
+0100
+++ 2010-06-15/xen/arch/ia64/xen/irq.c  2010-06-15 09:21:05.000000000 +0200
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
        }
 }
 
+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+       /* FIXME */
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
--- 2010-06-15.orig/xen/arch/x86/hvm/hvm.c      2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/arch/x86/hvm/hvm.c   2010-06-15 09:21:05.000000000 +0200
@@ -270,7 +270,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        irq_set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(desc, cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
--- 2010-06-15.orig/xen/arch/x86/irq.c  2010-06-11 11:41:35.000000000 +0200
+++ 2010-06-15/xen/arch/x86/irq.c       2010-06-15 09:21:05.000000000 +0200
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
 }
 
 /* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
 {
-    struct irq_desc *desc = irq_to_desc(irq);
-    
     if (!desc->handler->set_affinity)
         return;
     
     ASSERT(spin_is_locked(&desc->lock));
+    desc->status &= ~IRQ_MOVE_PENDING;
+    wmb();
+    cpus_copy(desc->pending_mask, *mask);
+    wmb();
     desc->status |= IRQ_MOVE_PENDING;
-    cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+    unsigned long flags;
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+    if ( !desc )
+        return;
+    irq_set_affinity(desc, mask);
+    spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 DEFINE_PER_CPU(unsigned int, irq_count);
--- 2010-06-15.orig/xen/common/event_channel.c  2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/common/event_channel.c       2010-06-15 09:21:05.000000000 
+0200
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
 }
 
 
+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+    chn->u.pirq.prev_port = 0;
+    chn->u.pirq.next_port = v->pirq_evtchn_head;
+    if ( v->pirq_evtchn_head )
+        evtchn_from_port(v->domain, v->pirq_evtchn_head)
+            ->u.pirq.prev_port = port;
+    v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    if ( chn->u.pirq.prev_port )
+        evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+            chn->u.pirq.next_port;
+    else
+        v->pirq_evtchn_head = chn->u.pirq.next_port;
+    if ( chn->u.pirq.next_port )
+        evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+            chn->u.pirq.prev_port;
+}
+
+
 static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
+    struct vcpu   *v = d->vcpu[0];
     int            port, pirq = bind->pirq;
     long           rc;
 
@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
     chn = evtchn_from_port(d, port);
 
     d->pirq_to_evtchn[pirq] = port;
-    rc = pirq_guest_bind(d->vcpu[0], pirq, 
+    rc = pirq_guest_bind(v, pirq,
                          !!(bind->flags & BIND_PIRQ__WILL_SHARE));
     if ( rc != 0 )
     {
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
     }
 
     chn->state  = ECS_PIRQ;
-    chn->u.pirq = pirq;
+    chn->u.pirq.irq = pirq;
+    link_pirq_port(port, chn, v);
 
     bind->port = port;
 
@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
         break;
 
     case ECS_PIRQ:
-        pirq_guest_unbind(d1, chn1->u.pirq);
-        d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+        pirq_guest_unbind(d1, chn1->u.pirq.irq);
+        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+        unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
         break;
 
     case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
         break;
     case ECS_PIRQ:
         status->status = EVTCHNSTAT_pirq;
-        status->u.pirq = chn->u.pirq;
+        status->u.pirq = chn->u.pirq.irq;
         break;
     case ECS_VIRQ:
         status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
         break;
     case ECS_UNBOUND:
     case ECS_INTERDOMAIN:
+        chn->notify_vcpu_id = vcpu_id;
+        break;
     case ECS_PIRQ:
+        if ( chn->notify_vcpu_id == vcpu_id )
+            break;
+        unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
         chn->notify_vcpu_id = vcpu_id;
+        pirq_set_affinity(d, chn->u.pirq.irq,
+                          cpumask_of(d->vcpu[vcpu_id]->processor));
+        link_pirq_port(port, chn, d->vcpu[vcpu_id]);
         break;
     default:
         rc = -EINVAL;
@@ -1064,6 +1100,23 @@ void evtchn_destroy_final(struct domain 
 }
 
 
+void evtchn_move_pirqs(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    const cpumask_t *mask = cpumask_of(v->processor);
+    unsigned int port;
+    struct evtchn *chn;
+
+    spin_lock(&d->event_lock);
+    for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+    {
+        chn = evtchn_from_port(d, port);
+        pirq_set_affinity(d, chn->u.pirq.irq, mask);
+    }
+    spin_unlock(&d->event_lock);
+}
+
+
 static void domain_dump_evtchn_info(struct domain *d)
 {
     unsigned int port;
@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
                    chn->u.interdomain.remote_port);
             break;
         case ECS_PIRQ:
-            printk(" p=%d", chn->u.pirq);
+            printk(" p=%d", chn->u.pirq.irq);
             break;
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
--- 2010-06-15.orig/xen/common/schedule.c       2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/common/schedule.c    2010-06-15 09:21:05.000000000 +0200
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d, 
         cpus_setall(v->cpu_affinity);
         v->processor = new_p;
         v->sched_priv = vcpu_priv[v->vcpu_id];
+        evtchn_move_pirqs(v);
 
         new_p = cycle_cpu(new_p, c->cpu_valid);
     }
@@ -419,6 +420,9 @@ static void vcpu_migrate(struct vcpu *v)
     spin_unlock_irqrestore(
         per_cpu(schedule_data, old_cpu).schedule_lock, flags);
 
+    if ( old_cpu != new_cpu )
+        evtchn_move_pirqs(v);
+
     /* Wake on new CPU. */
     vcpu_wake(v);
 }
@@ -1094,6 +1098,9 @@ static void schedule(void)
 
     stop_timer(&prev->periodic_timer);
 
+    if ( next_slice.migrated )
+        evtchn_move_pirqs(next);
+
     /* Ensure that the domain has an up-to-date time base. */
     update_vcpu_system_time(next);
     vcpu_periodic_timer_work(next);
--- 2010-06-15.orig/xen/common/sched_credit.c   2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_credit.c        2010-06-15 09:21:05.000000000 
+0200
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,
 
 static struct csched_vcpu *
 csched_load_balance(struct csched_private *prv, int cpu,
-    struct csched_vcpu *snext)
+    struct csched_vcpu *snext, bool_t *stolen)
 {
     struct csched_vcpu *speer;
     cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
         speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
         if ( speer != NULL )
+        {
+            *stolen = 1;
             return speer;
+        }
     }
 
  out:
@@ -1269,6 +1272,7 @@ csched_schedule(
         BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 
     snext = __runq_elem(runq->next);
+    ret.migrated = 0;
 
     /* Tasklet work (which runs in idle VCPU context) overrides all else. */
     if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
     if ( snext->pri > CSCHED_PRI_TS_OVER )
         __runq_remove(snext);
     else
-        snext = csched_load_balance(prv, cpu, snext);
+        snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
 
     /*
      * Update idlers mask if necessary. When we're idling, other CPUs
--- 2010-06-15.orig/xen/common/sched_credit2.c  2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_credit2.c       2010-06-15 09:22:13.000000000 
+0200
@@ -991,10 +991,17 @@ csched_schedule(
     }
 #endif
 
+    ret.migrated = 0;
+
     if ( !is_idle_vcpu(snext->vcpu) )
     {
         snext->start_time = now;
-        snext->vcpu->processor = cpu; /* Safe because lock for old processor 
is held */
+        /* Safe because lock for old processor is held */
+        if ( snext->vcpu->processor != cpu )
+        {
+            snext->vcpu->processor = cpu;
+            ret.migrated = 1;
+        }
     }
 
     /*
--- 2010-06-15.orig/xen/common/sched_sedf.c     2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_sedf.c  2010-06-15 09:21:05.000000000 +0200
@@ -875,6 +875,8 @@ static struct task_slice sedf_do_schedul
         ret.time = EXTRA_QUANTUM;
     }
 
+    ret.migrated = 0;
+
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
--- 2010-06-15.orig/xen/include/asm-x86/irq.h   2009-12-16 09:14:13.000000000 
+0100
+++ 2010-06-15/xen/include/asm-x86/irq.h        2010-06-15 09:21:05.000000000 
+0200
@@ -143,7 +143,7 @@ void move_native_irq(int irq);
 
 void move_masked_irq(int irq);
 
-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
 
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
--- 2010-06-15.orig/xen/include/xen/cpumask.h   2010-05-17 08:45:28.000000000 
+0200
+++ 2010-06-15/xen/include/xen/cpumask.h        2010-06-15 09:21:05.000000000 
+0200
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
 }
 
 #define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
 {
        bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
 }
--- 2010-06-15.orig/xen/include/xen/event.h     2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/include/xen/event.h  2010-06-15 09:21:05.000000000 +0200
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
 /* Unmask a local event-channel port. */
 int evtchn_unmask(unsigned int port);
 
+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
--- 2010-06-15.orig/xen/include/xen/irq.h       2009-10-29 12:24:49.000000000 
+0100
+++ 2010-06-15/xen/include/xen/irq.h    2010-06-15 09:21:05.000000000 +0200
@@ -138,6 +138,7 @@ extern int pirq_guest_eoi(struct domain 
 extern int pirq_guest_unmask(struct domain *d);
 extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
 extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
 extern irq_desc_t *domain_spin_lock_irq_desc(
     struct domain *d, int irq, unsigned long *pflags);
 
--- 2010-06-15.orig/xen/include/xen/sched.h     2010-06-14 08:49:36.000000000 
+0200
+++ 2010-06-15/xen/include/xen/sched.h  2010-06-15 09:21:05.000000000 +0200
@@ -61,7 +61,11 @@ struct evtchn
             u16            remote_port;
             struct domain *remote_dom;
         } interdomain; /* state == ECS_INTERDOMAIN */
-        u16 pirq;      /* state == ECS_PIRQ */
+        struct {
+            u16            irq;
+            u16            next_port;
+            u16            prev_port;
+        } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
 #ifdef FLASK_ENABLE
@@ -142,6 +146,9 @@ struct vcpu 
      */
     int              poll_evtchn;
 
+    /* (over-)protected by ->domain->event_lock */
+    int              pirq_evtchn_head;
+
     unsigned long    pause_flags;
     atomic_t         pause_count;
 
--- 2010-06-15.orig/xen/include/xen/sched-if.h  2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/include/xen/sched-if.h       2010-06-15 09:21:05.000000000 
+0200
@@ -79,6 +79,7 @@ static inline void vcpu_schedule_unlock(
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;
+    bool_t       migrated;
 };
 
 struct scheduler {


Attachment: guest-irq-affinity.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH, RFC] x86: IRQ affinity should track vCPU affinity, Jan Beulich <=