WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] irq_guest_eoi_timer interaction with MSI

To: "Keir Fraser" <keir.fraser@xxxxxxxxxxxxx>
Subject: Re: [Xen-devel] irq_guest_eoi_timer interaction with MSI
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Mon, 24 Nov 2008 16:34:46 +0000
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Mon, 24 Nov 2008 08:35:09 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <C542F881.1F388%keir.fraser@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <491D52E8.76E4.0078.0@xxxxxxxxxx> <C542F881.1F388%keir.fraser@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
>>> Keir Fraser <keir.fraser@xxxxxxxxxxxxx> 14.11.08 10:42 >>>
>On 14/11/08 09:28, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote:
>Perhaps if we go your route we can make PHYSDEVOP_irq_eoi obsolete? It's
>only really called where we also do an unmask, and it's pointless to have
>two hypercalls where one will do. So a guest that detects the new bitmap
>could then know it only needs to unmask-by-hypercall, rather than use
>PHYSDEVOP_irq_eoi at all.

Perhaps the other way around: Make PHYSDEVOP_eoi imply an unmask,
as that's what is always happening (whereas not every unmask also wants
an EOI to be signaled). Below is a draft (compile-tested only) patch that,
before coding the guest side, I'd appreciate to get comments on -
especially if it appears reasonable to be done that way, if it meets your
naming and coding preferences (I'm pretty sure it won't), and of course
whether it's obviously broken in some respect.

Thanks, Jan

Index: 2008-11-20/xen/arch/x86/domain.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/domain.c       2008-11-20 08:56:58.000000000 
+0100
+++ 2008-11-20/xen/arch/x86/domain.c    2008-11-24 15:03:21.000000000 +0100
@@ -1823,6 +1823,12 @@ int domain_relinquish_resources(struct d
             unmap_vcpu_info(v);
         }
 
+        if ( d->arch.pirq_eoi_mfn )
+        {
+            put_page_and_type(mfn_to_page(d->arch.pirq_eoi_mfn));
+            d->arch.pirq_eoi_mfn = 0;
+        }
+
         d->arch.relmem = RELMEM_xen;
         /* fallthrough */
 
Index: 2008-11-20/xen/arch/x86/irq.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/irq.c  2008-11-20 10:13:47.000000000 +0100
+++ 2008-11-20/xen/arch/x86/irq.c       2008-11-24 17:25:28.000000000 +0100
@@ -18,6 +18,7 @@
 #include <xen/iommu.h>
 #include <asm/msi.h>
 #include <asm/current.h>
+#include <asm/flushtlb.h>
 #include <public/physdev.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -206,16 +207,63 @@ struct pending_eoi {
 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
 #define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
 
+#ifdef __i386__
+/* Cannot use map_domain_page(). */
+static inline unsigned long *map_pirq_eoi(struct domain *d)
+{
+    void *v = (void *)fix_to_virt(FIX_PIRQ_EOI);
+
+    __set_fixmap(FIX_PIRQ_EOI, d->arch.pirq_eoi_mfn, __PAGE_HYPERVISOR);
+    flush_tlb_one_local(v);
+    return v;
+}
+
+static inline void flush_pirq_eoi(void)
+{
+    __set_fixmap(FIX_PIRQ_EOI, 0, 0);
+    flush_tlb_one_local(fix_to_virt(FIX_PIRQ_EOI));
+}
+#else
+# define map_pirq_eoi(d) ((unsigned long *)mfn_to_virt((d)->arch.pirq_eoi_mfn))
+# define flush_pirq_eoi()
+#endif
+
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_mfn )
+        set_bit(irq, map_pirq_eoi(d));
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_mfn )
+        clear_bit(irq, map_pirq_eoi(d));
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    unsigned int i, vector = desc - irq_desc;
+
+    for ( i = 0; i < action->nr_guests; ++i )
+        clear_pirq_eoi(action->guest[i],
+                       domain_vector_to_irq(action->guest[i], vector));
+    flush_pirq_eoi();
+
+    desc->status &= ~IRQ_INPROGRESS;
+    desc->handler->enable(vector);
+}
+
 static struct timer irq_guest_eoi_timer[NR_VECTORS];
+static DECLARE_BITMAP(irq_guest_eoi_pending, NR_VECTORS);
 static void irq_guest_eoi_timer_fn(void *data)
 {
     irq_desc_t *desc = data;
-    unsigned vector = desc - irq_desc;
     unsigned long flags;
 
     spin_lock_irqsave(&desc->lock, flags);
-    desc->status &= ~IRQ_INPROGRESS;
-    desc->handler->enable(vector);
+    if ( test_and_clear_bit(desc - irq_desc, irq_guest_eoi_pending) )
+        _irq_guest_eoi(desc);
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
@@ -272,8 +320,15 @@ static void __do_IRQ_guest(int vector)
 
     if ( already_pending == action->nr_guests )
     {
-        desc->handler->disable(vector);
         stop_timer(&irq_guest_eoi_timer[vector]);
+        desc->handler->disable(vector);
+        set_bit(vector, irq_guest_eoi_pending);
+        for ( i = 0; i < already_pending; ++i )
+        {
+            d = action->guest[i];
+            set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+        }
+        flush_pirq_eoi();
         init_timer(&irq_guest_eoi_timer[vector],
                    irq_guest_eoi_timer_fn, desc, smp_processor_id());
         set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
@@ -382,8 +437,15 @@ static void __pirq_guest_eoi(struct doma
     action = (irq_guest_action_t *)desc->action;
     vector = desc - irq_desc;
 
-    ASSERT(!test_bit(irq, d->pirq_mask) ||
-           (action->ack_type != ACKTYPE_NONE));
+    if ( action->ack_type == ACKTYPE_NONE )
+    {
+        ASSERT(!test_bit(irq, d->pirq_mask));
+        if ( test_and_clear_bit(vector, irq_guest_eoi_pending) )
+        {
+            kill_timer(&irq_guest_eoi_timer[vector]);
+            _irq_guest_eoi(desc);
+        }
+    }
 
     if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
          unlikely(--action->in_flight != 0) )
@@ -607,6 +669,12 @@ int pirq_guest_bind(struct vcpu *v, int 
 
     action->guest[action->nr_guests++] = v->domain;
 
+    if ( action->ack_type != ACKTYPE_NONE )
+        set_pirq_eoi(v->domain, irq);
+    else
+        clear_pirq_eoi(v->domain, irq);
+    flush_pirq_eoi();
+
  unlock_out:
     spin_unlock_irq(&desc->lock);
  out:
Index: 2008-11-20/xen/arch/x86/physdev.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/physdev.c      2008-10-13 13:36:27.000000000 
+0200
+++ 2008-11-20/xen/arch/x86/physdev.c   2008-11-24 17:06:48.000000000 +0100
@@ -191,10 +191,35 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = -EFAULT;
         if ( copy_from_guest(&eoi, arg, 1) != 0 )
             break;
+        ret = -EINVAL;
+        if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+            break;
+        if ( v->domain->arch.pirq_eoi_mfn )
+            evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
         ret = pirq_guest_eoi(v->domain, eoi.irq);
         break;
     }
 
+    case PHYSDEVOP_pirq_eoi_mfn: {
+        struct physdev_pirq_eoi_mfn info;
+
+        BUILD_BUG_ON(NR_IRQS > PAGE_SIZE * 8);
+        ret = -EFAULT;
+        if ( copy_from_guest(&info, arg, 1) != 0 )
+            break;
+        ret = -EBUSY;
+        if ( v->domain->arch.pirq_eoi_mfn )
+            break;
+        ret = -EINVAL;
+        if ( !mfn_valid(info.mfn) ||
+             !get_page_and_type(mfn_to_page(info.mfn), v->domain,
+                                PGT_writable_page) )
+            break;
+        v->domain->arch.pirq_eoi_mfn = info.mfn;
+        ret = 0;
+        break;
+    }
+
     /* Legacy since 0x00030202. */
     case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
         ret = pirq_guest_unmask(v->domain);
Index: 2008-11-20/xen/common/event_channel.c
===================================================================
--- 2008-11-20.orig/xen/common/event_channel.c  2008-10-24 11:21:38.000000000 
+0200
+++ 2008-11-20/xen/common/event_channel.c       2008-11-24 17:14:44.000000000 
+0100
@@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port,
 }
 
 
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
 {
     struct domain *d = current->domain;
-    int            port = unmask->port;
     struct vcpu   *v;
 
     spin_lock(&d->event_lock);
@@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU
         struct evtchn_unmask unmask;
         if ( copy_from_guest(&unmask, arg, 1) != 0 )
             return -EFAULT;
-        rc = evtchn_unmask(&unmask);
+        rc = evtchn_unmask(unmask.port);
         break;
     }
 
Index: 2008-11-20/xen/include/asm-x86/domain.h
===================================================================
--- 2008-11-20.orig/xen/include/asm-x86/domain.h        2008-11-20 
08:48:26.000000000 +0100
+++ 2008-11-20/xen/include/asm-x86/domain.h     2008-11-24 14:52:49.000000000 
+0100
@@ -238,6 +238,8 @@ struct arch_domain
     int vector_pirq[NR_VECTORS];
     s16 pirq_vector[NR_IRQS];
 
+    unsigned long pirq_eoi_mfn;
+
     /* Pseudophysical e820 map (XENMEM_memory_map).  */
     struct e820entry e820[3];
     unsigned int nr_e820;
Index: 2008-11-20/xen/include/asm-x86/fixmap.h
===================================================================
--- 2008-11-20.orig/xen/include/asm-x86/fixmap.h        2008-11-05 
16:54:22.000000000 +0100
+++ 2008-11-20/xen/include/asm-x86/fixmap.h     2008-11-24 16:43:03.000000000 
+0100
@@ -33,6 +33,7 @@ enum fixed_addresses {
 #ifdef __i386__
     FIX_PAE_HIGHMEM_0,
     FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
+    FIX_PIRQ_EOI,
 #endif
     FIX_APIC_BASE,
     FIX_IO_APIC_BASE_0,
Index: 2008-11-20/xen/include/public/physdev.h
===================================================================
--- 2008-11-20.orig/xen/include/public/physdev.h        2008-08-15 
16:18:55.000000000 +0200
+++ 2008-11-20/xen/include/public/physdev.h     2008-11-24 15:16:10.000000000 
+0100
@@ -41,6 +41,21 @@ typedef struct physdev_eoi physdev_eoi_t
 DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
 
 /*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_mfn          17
+struct physdev_pirq_eoi_mfn {
+    /* IN */
+    xen_pfn_t mfn;
+};
+typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t);
+
+/*
  * Query the status of an IRQ line.
  * @arg == pointer to physdev_irq_status_query structure.
  */
Index: 2008-11-20/xen/include/xen/event.h
===================================================================
--- 2008-11-20.orig/xen/include/xen/event.h     2008-09-19 14:28:28.000000000 
+0200
+++ 2008-11-20/xen/include/xen/event.h  2008-11-24 17:12:01.000000000 +0100
@@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign
 /* Bind a local event-channel port to the specified VCPU. */
 long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
 
+/* unmask the current domain's event-channel port. */
+int evtchn_unmask(unsigned int port);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>