[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 19/26] xen/riscv: implement IRQ routing for device passthrough





On 6/3/26 6:01 PM, Jan Beulich wrote:
On 08.05.2026 16:43, Oleksii Kurochko wrote:
--- /dev/null
+++ b/xen/arch/riscv/device.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <xen/device_tree.h>
+#include <xen/errno.h>
+#include <xen/iocap.h>
+#include <xen/rangeset.h>
+#include <xen/sched.h>
+
+#include <asm/intc.h>
+
+int map_irq_to_domain(struct domain *d, unsigned int irq,
+                      bool need_mapping, const char *devname)
+{
+    int res;
+
+    res = irq_permit_access(d, irq);
+    if ( res )
+    {
+        printk(XENLOG_ERR "Unable to permit to %pd access to IRQ %u\n", d, 
irq);

Nit: Drop the first "to"?

Sure, I will drop that.


+        return res;
+    }
+
+    if ( need_mapping )
+    {
+        /*
+         * Checking the return of vintc_reserve_virq is not
+         * necessary. It should not fail except when we try to map
+         * the IRQ twice. This can legitimately happen if the IRQ is shared.
+         */
+        vintc_reserve_virq(d, irq);
+
+        res = route_irq_to_guest(d, irq, irq, devname);
+        if ( res < 0 )
+        {
+            printk(XENLOG_ERR "Unable to map IRQ%u to %pd\n", irq, d);
+            return res;
+        }
+    }
+
+    dt_dprintk("  - IRQ: %u\n", irq);
+
+    return 0;
+}
+
+/*
+ * map_device_irqs_to_domain retrieves the interrupts configuration from
+ * a device tree node and maps those interrupts to the target domain.
+ *
+ * Returns:
+ *   < 0 error
+ *   0   success
+ */
+int map_device_irqs_to_domain(struct domain *d,
+                              struct dt_device_node *dev,
+                              bool need_mapping,
+                              struct rangeset *irq_ranges)
+{
+    unsigned int i, nirq;
+    int res, irq;
+    struct dt_raw_irq rirq;

Move the latter three variables to the loop's scope and ...

+    nirq = dt_number_of_irq(dev);

... make this the variable's initializer?

It makes sense. I will do that.


+    /* Give permission and map IRQs */
+    for ( i = 0; i < nirq; i++ )
+    {
+        res = dt_device_get_raw_irq(dev, i, &rirq);
+        if ( res )
+        {
+            printk(XENLOG_ERR "Unable to retrieve irq %u for %s\n",
+                   i, dt_node_full_name(dev));
+            return res;
+        }
+
+        /*
+         * Don't map IRQ that have no physical meaning
+         * ie: IRQ whose controller is not APLIC/IMSIC/PLIC.
+         */
+        if ( rirq.controller != dt_interrupt_controller )
+        {
+            dt_dprintk("irq %u not connected to primary controller."
+                       "Connected to %s\n", i,
+                       dt_node_full_name(rirq.controller));
+            continue;
+        }
+
+        irq = platform_get_irq(dev, i);
+        if ( irq < 0 )
+        {
+            printk("Unable to get irq %u for %s\n", i, dt_node_full_name(dev));
+            return irq;
+        }
+
+        res = map_irq_to_domain(d, irq, need_mapping, dt_node_name(dev));
+        if ( res )
+            return res;
+
+
+        /*
+         * At the moment there is only one user of map_device_irqs_to_domain()
+         * for RISC-V which calls it irq_ranges == NULL.
+         */
+        if ( irq_ranges )
+            return -EOPNOTSUPP;

Why is this checked last, and inside the loop (when it's loop invariant)?

Just to show the place where irq_ranges will be handled. But currently I agree it would be better just move outside the loop.


--- a/xen/arch/riscv/include/asm/intc.h
+++ b/xen/arch/riscv/include/asm/intc.h
@@ -13,8 +13,11 @@ enum intc_version {
  };
struct cpu_user_regs;
+struct domain;

I can spot why this is needed, but ...

+struct dt_device_node;
  struct irq_desc;
  struct kernel_info;
+struct rangeset;
  struct vcpu;

... I'm at a loss to explain the need for these two additions.

Rudements from previous version of this patch series. I will drop them.
The same below ...


--- a/xen/arch/riscv/include/asm/setup.h
+++ b/xen/arch/riscv/include/asm/setup.h
@@ -5,6 +5,10 @@
#include <xen/types.h> +struct domain;
+struct dt_device_node;
+struct rangeset;

Same here - why would they be needed when you make no other changes
to this header?

... here.


--- a/xen/arch/riscv/intc.c
+++ b/xen/arch/riscv/intc.c
@@ -7,7 +7,9 @@
  #include <xen/init.h>
  #include <xen/irq.h>
  #include <xen/lib.h>
+#include <xen/sched.h>
  #include <xen/spinlock.h>
+#include <xen/xvmalloc.h>
#include <asm/aia.h>
  #include <asm/intc.h>
@@ -86,6 +88,22 @@ unsigned int intc_irq_nums(void)
      return intc_hw_ops->irq_nums();
  }
+int intc_route_irq_to_guest(struct irq_desc *desc,
+                            unsigned int priority)
+{
+    ASSERT(spin_is_locked(&desc->lock));
+
+    ASSERT(intc_hw_ops->guest_irq_type);
+
+    desc->handler = intc_hw_ops->guest_irq_type;
+    set_bit(_IRQ_GUEST, &desc->status);

Is desc->status accessed anywhere without holding desc->lock? If not,
__set_bit() or simply |= ?

In release_irq() it could be used without lock:
...
    /* Wait to make sure it's not being used on another CPU */
    do { smp_mb(); } while ( test_bit(_IRQ_INPROGRESS, &desc->status) );


@@ -112,6 +130,14 @@ int domain_vintc_init(struct domain *d)
          break;
      }
+ if ( !ret )
+    {
+        d->arch.vintc->allocated_irqs =
+            xvzalloc_array(unsigned long, 
BITS_TO_LONGS(d->arch.vintc->irq_nums));
+        if ( !d->arch.vintc->allocated_irqs )
+            ret = -ENOMEM;
+    }
+
      return ret;
  }
@@ -129,4 +155,14 @@ void domain_vintc_deinit(struct domain *d)
          printk("vintc (ver:%d) isn't implemented\n", ver);
          break;
      }
+
+    xvfree(d->arch.vintc->allocated_irqs);
+}

XVFREE()

+bool vintc_reserve_virq(const struct domain *d, unsigned int virq)
+{
+    if ( virq >= d->arch.vintc->irq_nums )
+        return false;
+
+    return !test_and_set_bit(virq, d->arch.vintc->allocated_irqs);
  }

As to function / field naming: You don't look to be allocating IRQs. So
is there a reason the field name gives the impression of allocation?
Simply s/allocated/used/ or some such?

I am okay to rename to 'used' instead of 'allocated'.


@@ -221,3 +239,160 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq)
      spin_unlock(&desc->lock);
      irq_exit();
  }
+
+static inline struct irq_guest *irq_get_guest_info(struct irq_desc *desc)
+{
+    ASSERT(spin_is_locked(&desc->lock));
+    ASSERT(test_bit(_IRQ_GUEST, &desc->status));
+    ASSERT(desc->action != NULL);
+
+    return desc->action->dev_id;
+}
+
+static inline struct domain *irq_get_domain(struct irq_desc *desc)
+{
+    return irq_get_guest_info(desc)->d;
+}
+
+void release_irq(unsigned int irq, const void *dev_id)
+{
+    struct irq_desc *desc;
+    unsigned long flags;
+    struct irqaction *action, **action_ptr;
+
+    desc = irq_to_desc(irq);
+
+    spin_lock_irqsave(&desc->lock,flags);
+
+    action_ptr = &desc->action;
+#ifdef CONFIG_IRQ_HAS_MULTIPLE_ACTION
+    for ( ;; )
+    {
+        action = *action_ptr;
+        if ( !action )
+        {
+            printk(XENLOG_WARNING "Trying to free already-free IRQ %u\n", irq);
+            spin_unlock_irqrestore(&desc->lock, flags);
+            return;
+        }
+
+        if ( action->dev_id == dev_id )
+            break;
+
+        action_ptr = &action->next;
+    }
+
+    /* Found it - remove it from the action list */
+    *action_ptr = action->next;
+#else
+    action = *action_ptr;

It is needed to add *action_ptr = NULL here to deal with ...

+#endif
+
+    /* If this was the last action, shut down the IRQ */
+    if ( !desc->action )
+    {
+        desc->handler->shutdown(desc);
+        clear_bit(_IRQ_GUEST, &desc->status);
+    }
+
+    spin_unlock_irqrestore(&desc->lock,flags);
+
+    /* Wait to make sure it's not being used on another CPU */
+    do { smp_mb(); } while ( test_bit(_IRQ_INPROGRESS, &desc->status) );
+
+    if ( action->free_on_release )
+        xvfree(action);

When !IRQ_HAS_MULTIPLE_ACTION desc->action becomes a dangling pointer here.

... it could be xvfree here as action is local variable.


+/* Route an IRQ to a specific guest */
+int route_irq_to_guest(struct domain *d, unsigned int virq,
+                       unsigned int irq, const char *devname)
+{
+    struct irqaction *action;
+    struct irq_guest *info;
+    struct irq_desc *desc;
+    unsigned long flags;
+    int retval = 0;
+
+    desc = irq_to_desc(irq);
+
+    action = xvmalloc(struct irqaction);
+    if ( !action )
+        return -ENOMEM;

This is freed by release_irq(), but ...

+    info = xvmalloc(struct irq_guest);
+    if ( !info )

... where is the (non-error-path) freeing of this?


Agree it should be freed.

I am thing about just to update release_irq():

if ( action->free_on_release )
{
    xvfree(action->dev_id);
    xvfree(action);
}

But I think it is conceptually is incorrect as owner of ->dev_id in this case is guest so it would be better if guest will do that. So I think it would be better to intoduce now release_guest_irq():

int release_guest_irq(struct domain *d, unsigned int virq)
{
    struct irq_desc *desc;
    struct irq_guest *info;
    unsigned long flags;

    desc = irq_to_desc(virq);

    spin_lock_irqsave(&desc->lock, flags);

    if ( !test_bit(_IRQ_GUEST, &desc->status) )
        goto unlock_err;

    info = irq_get_guest_info(desc);
    if ( d != info->d )
        goto unlock_err;

    spin_unlock_irqrestore(&desc->lock, flags);

    release_irq(desc->irq, info);
    xvfree(info);

    return 0;

 unlock_err:
    spin_unlock_irqrestore(&desc->lock, flags);
    return -EINVAL;
}

and then call it in domain_vintc_deinit() for all virqs of a domain.

+    {
+        xvfree(action);
+        return -ENOMEM;
+    }
+
+    info->d = d;
+    info->virq = virq;
+
+    action->dev_id = info;
+    action->name = devname;
+    action->free_on_release = 1;

true

+    spin_lock_irqsave(&desc->lock, flags);
+
+    /*
+     * If the IRQ is already used by someone
+     *  - If it's the same domain -> Xen doesn't need to update the IRQ desc.
+     *  For safety check if we are not trying to assign the IRQ to a
+     *  different vIRQ.
+     *  - Otherwise -> For now, don't allow the IRQ to be shared between
+     *  Xen and domains.
+     */
+    if ( desc->action != NULL )
+    {
+        if ( test_bit(_IRQ_GUEST, &desc->status) )
+        {
+            struct domain *ad = irq_get_domain(desc);
+
+            if ( d != ad )
+            {
+                printk(XENLOG_G_ERR "IRQ %u is already used by domain %u\n",
+                       irq, ad->domain_id);
+                retval = -EBUSY;
+            }
+            else if ( irq_get_guest_info(desc)->virq != virq )
+            {
+                printk(XENLOG_G_ERR
+                       "d%u: IRQ %u is already assigned to vIRQ %u\n",
+                       d->domain_id, irq, irq_get_guest_info(desc)->virq);

Please can you get used to using %pd?

I'll do my best. Thanks for consistently pointing that out. I appreciate it.



+                retval = -EBUSY;
+            }
+        }
+        else
+        {
+            printk(XENLOG_G_ERR "IRQ %u is already used by Xen\n", irq);
+            retval = -EBUSY;
+        }
+        goto out;
+    }
+
+    retval = _setup_irq(desc, 0, action);
+    if ( retval )
+        goto out;
+
+    retval = intc_route_irq_to_guest(desc, IRQ_NO_PRIORITY);
+
+    spin_unlock_irqrestore(&desc->lock, flags);
+
+    if ( retval )
+    {
+        release_irq(desc->irq, info);

Is de-referencing desc legitimate / race free with desc->lock not held?

desc itself cannot be freed, it's a pointer into the statically allocated array irq_desc[NR_IRQS] (riscv/irq.c:29). The descriptor object lives for the lifetime of the system.

desc->irq is write-once, it's set during init_irq_data() at boot (riscv/irq.c:172) and never modified again. It's effectively an immutable field after initialization, so reading it without the lock held is safe.

Thanks.

~ Oleksii




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.