[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 19/26] xen/riscv: implement IRQ routing for device passthrough
- To: Jan Beulich <jbeulich@xxxxxxxx>
- From: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
- Date: Thu, 4 Jun 2026 17:35:27 +0200
- Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=20251104 header.d=gmail.com header.i="@gmail.com" header.h="Content-Transfer-Encoding:In-Reply-To:From:Content-Language:References:Cc:To:Subject:User-Agent:MIME-Version:Date:Message-ID"
- Cc: Romain Caritey <Romain.Caritey@xxxxxxxxxxxxx>, Alistair Francis <alistair.francis@xxxxxxx>, Connor Davis <connojdavis@xxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Julien Grall <julien@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
- Delivery-date: Thu, 04 Jun 2026 15:35:32 +0000
- List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
On 6/3/26 6:01 PM, Jan Beulich wrote:
On 08.05.2026 16:43, Oleksii Kurochko wrote:
--- /dev/null
+++ b/xen/arch/riscv/device.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <xen/device_tree.h>
+#include <xen/errno.h>
+#include <xen/iocap.h>
+#include <xen/rangeset.h>
+#include <xen/sched.h>
+
+#include <asm/intc.h>
+
+int map_irq_to_domain(struct domain *d, unsigned int irq,
+ bool need_mapping, const char *devname)
+{
+ int res;
+
+ res = irq_permit_access(d, irq);
+ if ( res )
+ {
+ printk(XENLOG_ERR "Unable to permit to %pd access to IRQ %u\n", d,
irq);
Nit: Drop the first "to"?
Sure, I will drop that.
+ return res;
+ }
+
+ if ( need_mapping )
+ {
+ /*
+ * Checking the return of vintc_reserve_virq is not
+ * necessary. It should not fail except when we try to map
+ * the IRQ twice. This can legitimately happen if the IRQ is shared.
+ */
+ vintc_reserve_virq(d, irq);
+
+ res = route_irq_to_guest(d, irq, irq, devname);
+ if ( res < 0 )
+ {
+ printk(XENLOG_ERR "Unable to map IRQ%u to %pd\n", irq, d);
+ return res;
+ }
+ }
+
+ dt_dprintk(" - IRQ: %u\n", irq);
+
+ return 0;
+}
+
+/*
+ * map_device_irqs_to_domain retrieves the interrupts configuration from
+ * a device tree node and maps those interrupts to the target domain.
+ *
+ * Returns:
+ * < 0 error
+ * 0 success
+ */
+int map_device_irqs_to_domain(struct domain *d,
+ struct dt_device_node *dev,
+ bool need_mapping,
+ struct rangeset *irq_ranges)
+{
+ unsigned int i, nirq;
+ int res, irq;
+ struct dt_raw_irq rirq;
Move the latter three variables to the loop's scope and ...
+ nirq = dt_number_of_irq(dev);
... make this the variable's initializer?
It makes sense. I will do that.
+ /* Give permission and map IRQs */
+ for ( i = 0; i < nirq; i++ )
+ {
+ res = dt_device_get_raw_irq(dev, i, &rirq);
+ if ( res )
+ {
+ printk(XENLOG_ERR "Unable to retrieve irq %u for %s\n",
+ i, dt_node_full_name(dev));
+ return res;
+ }
+
+ /*
+ * Don't map IRQ that have no physical meaning
+ * ie: IRQ whose controller is not APLIC/IMSIC/PLIC.
+ */
+ if ( rirq.controller != dt_interrupt_controller )
+ {
+ dt_dprintk("irq %u not connected to primary controller."
+ "Connected to %s\n", i,
+ dt_node_full_name(rirq.controller));
+ continue;
+ }
+
+ irq = platform_get_irq(dev, i);
+ if ( irq < 0 )
+ {
+ printk("Unable to get irq %u for %s\n", i, dt_node_full_name(dev));
+ return irq;
+ }
+
+ res = map_irq_to_domain(d, irq, need_mapping, dt_node_name(dev));
+ if ( res )
+ return res;
+
+
+ /*
+ * At the moment there is only one user of map_device_irqs_to_domain()
+ * for RISC-V which calls it irq_ranges == NULL.
+ */
+ if ( irq_ranges )
+ return -EOPNOTSUPP;
Why is this checked last, and inside the loop (when it's loop invariant)?
Just to show the place where irq_ranges will be handled. But currently I
agree it would be better just move outside the loop.
--- a/xen/arch/riscv/include/asm/intc.h
+++ b/xen/arch/riscv/include/asm/intc.h
@@ -13,8 +13,11 @@ enum intc_version {
};
struct cpu_user_regs;
+struct domain;
I can spot why this is needed, but ...
+struct dt_device_node;
struct irq_desc;
struct kernel_info;
+struct rangeset;
struct vcpu;
... I'm at a loss to explain the need for these two additions.
Rudements from previous version of this patch series. I will drop them.
The same below ...
--- a/xen/arch/riscv/include/asm/setup.h
+++ b/xen/arch/riscv/include/asm/setup.h
@@ -5,6 +5,10 @@
#include <xen/types.h>
+struct domain;
+struct dt_device_node;
+struct rangeset;
Same here - why would they be needed when you make no other changes
to this header?
... here.
--- a/xen/arch/riscv/intc.c
+++ b/xen/arch/riscv/intc.c
@@ -7,7 +7,9 @@
#include <xen/init.h>
#include <xen/irq.h>
#include <xen/lib.h>
+#include <xen/sched.h>
#include <xen/spinlock.h>
+#include <xen/xvmalloc.h>
#include <asm/aia.h>
#include <asm/intc.h>
@@ -86,6 +88,22 @@ unsigned int intc_irq_nums(void)
return intc_hw_ops->irq_nums();
}
+int intc_route_irq_to_guest(struct irq_desc *desc,
+ unsigned int priority)
+{
+ ASSERT(spin_is_locked(&desc->lock));
+
+ ASSERT(intc_hw_ops->guest_irq_type);
+
+ desc->handler = intc_hw_ops->guest_irq_type;
+ set_bit(_IRQ_GUEST, &desc->status);
Is desc->status accessed anywhere without holding desc->lock? If not,
__set_bit() or simply |= ?
In release_irq() it could be used without lock:
...
/* Wait to make sure it's not being used on another CPU */
do { smp_mb(); } while ( test_bit(_IRQ_INPROGRESS, &desc->status) );
@@ -112,6 +130,14 @@ int domain_vintc_init(struct domain *d)
break;
}
+ if ( !ret )
+ {
+ d->arch.vintc->allocated_irqs =
+ xvzalloc_array(unsigned long,
BITS_TO_LONGS(d->arch.vintc->irq_nums));
+ if ( !d->arch.vintc->allocated_irqs )
+ ret = -ENOMEM;
+ }
+
return ret;
}
@@ -129,4 +155,14 @@ void domain_vintc_deinit(struct domain *d)
printk("vintc (ver:%d) isn't implemented\n", ver);
break;
}
+
+ xvfree(d->arch.vintc->allocated_irqs);
+}
XVFREE()
+bool vintc_reserve_virq(const struct domain *d, unsigned int virq)
+{
+ if ( virq >= d->arch.vintc->irq_nums )
+ return false;
+
+ return !test_and_set_bit(virq, d->arch.vintc->allocated_irqs);
}
As to function / field naming: You don't look to be allocating IRQs. So
is there a reason the field name gives the impression of allocation?
Simply s/allocated/used/ or some such?
I am okay to rename to 'used' instead of 'allocated'.
@@ -221,3 +239,160 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq)
spin_unlock(&desc->lock);
irq_exit();
}
+
+static inline struct irq_guest *irq_get_guest_info(struct irq_desc *desc)
+{
+ ASSERT(spin_is_locked(&desc->lock));
+ ASSERT(test_bit(_IRQ_GUEST, &desc->status));
+ ASSERT(desc->action != NULL);
+
+ return desc->action->dev_id;
+}
+
+static inline struct domain *irq_get_domain(struct irq_desc *desc)
+{
+ return irq_get_guest_info(desc)->d;
+}
+
+void release_irq(unsigned int irq, const void *dev_id)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+ struct irqaction *action, **action_ptr;
+
+ desc = irq_to_desc(irq);
+
+ spin_lock_irqsave(&desc->lock,flags);
+
+ action_ptr = &desc->action;
+#ifdef CONFIG_IRQ_HAS_MULTIPLE_ACTION
+ for ( ;; )
+ {
+ action = *action_ptr;
+ if ( !action )
+ {
+ printk(XENLOG_WARNING "Trying to free already-free IRQ %u\n", irq);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return;
+ }
+
+ if ( action->dev_id == dev_id )
+ break;
+
+ action_ptr = &action->next;
+ }
+
+ /* Found it - remove it from the action list */
+ *action_ptr = action->next;
+#else
+ action = *action_ptr;
It is needed to add *action_ptr = NULL here to deal with ...
+#endif
+
+ /* If this was the last action, shut down the IRQ */
+ if ( !desc->action )
+ {
+ desc->handler->shutdown(desc);
+ clear_bit(_IRQ_GUEST, &desc->status);
+ }
+
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ /* Wait to make sure it's not being used on another CPU */
+ do { smp_mb(); } while ( test_bit(_IRQ_INPROGRESS, &desc->status) );
+
+ if ( action->free_on_release )
+ xvfree(action);
When !IRQ_HAS_MULTIPLE_ACTION desc->action becomes a dangling pointer here.
... it could be xvfree here as action is local variable.
+/* Route an IRQ to a specific guest */
+int route_irq_to_guest(struct domain *d, unsigned int virq,
+ unsigned int irq, const char *devname)
+{
+ struct irqaction *action;
+ struct irq_guest *info;
+ struct irq_desc *desc;
+ unsigned long flags;
+ int retval = 0;
+
+ desc = irq_to_desc(irq);
+
+ action = xvmalloc(struct irqaction);
+ if ( !action )
+ return -ENOMEM;
This is freed by release_irq(), but ...
+ info = xvmalloc(struct irq_guest);
+ if ( !info )
... where is the (non-error-path) freeing of this?
Agree it should be freed.
I am thing about just to update release_irq():
if ( action->free_on_release )
{
xvfree(action->dev_id);
xvfree(action);
}
But I think it is conceptually is incorrect as owner of ->dev_id in this
case is guest so it would be better if guest will do that. So I think it
would be better to intoduce now release_guest_irq():
int release_guest_irq(struct domain *d, unsigned int virq)
{
struct irq_desc *desc;
struct irq_guest *info;
unsigned long flags;
desc = irq_to_desc(virq);
spin_lock_irqsave(&desc->lock, flags);
if ( !test_bit(_IRQ_GUEST, &desc->status) )
goto unlock_err;
info = irq_get_guest_info(desc);
if ( d != info->d )
goto unlock_err;
spin_unlock_irqrestore(&desc->lock, flags);
release_irq(desc->irq, info);
xvfree(info);
return 0;
unlock_err:
spin_unlock_irqrestore(&desc->lock, flags);
return -EINVAL;
}
and then call it in domain_vintc_deinit() for all virqs of a domain.
+ {
+ xvfree(action);
+ return -ENOMEM;
+ }
+
+ info->d = d;
+ info->virq = virq;
+
+ action->dev_id = info;
+ action->name = devname;
+ action->free_on_release = 1;
true
+ spin_lock_irqsave(&desc->lock, flags);
+
+ /*
+ * If the IRQ is already used by someone
+ * - If it's the same domain -> Xen doesn't need to update the IRQ desc.
+ * For safety check if we are not trying to assign the IRQ to a
+ * different vIRQ.
+ * - Otherwise -> For now, don't allow the IRQ to be shared between
+ * Xen and domains.
+ */
+ if ( desc->action != NULL )
+ {
+ if ( test_bit(_IRQ_GUEST, &desc->status) )
+ {
+ struct domain *ad = irq_get_domain(desc);
+
+ if ( d != ad )
+ {
+ printk(XENLOG_G_ERR "IRQ %u is already used by domain %u\n",
+ irq, ad->domain_id);
+ retval = -EBUSY;
+ }
+ else if ( irq_get_guest_info(desc)->virq != virq )
+ {
+ printk(XENLOG_G_ERR
+ "d%u: IRQ %u is already assigned to vIRQ %u\n",
+ d->domain_id, irq, irq_get_guest_info(desc)->virq);
Please can you get used to using %pd?
I'll do my best. Thanks for consistently pointing that out. I appreciate it.
+ retval = -EBUSY;
+ }
+ }
+ else
+ {
+ printk(XENLOG_G_ERR "IRQ %u is already used by Xen\n", irq);
+ retval = -EBUSY;
+ }
+ goto out;
+ }
+
+ retval = _setup_irq(desc, 0, action);
+ if ( retval )
+ goto out;
+
+ retval = intc_route_irq_to_guest(desc, IRQ_NO_PRIORITY);
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( retval )
+ {
+ release_irq(desc->irq, info);
Is de-referencing desc legitimate / race free with desc->lock not held?
desc itself cannot be freed, it's a pointer into the statically
allocated array irq_desc[NR_IRQS] (riscv/irq.c:29). The descriptor
object lives for the lifetime of the system.
desc->irq is write-once, it's set during init_irq_data() at boot
(riscv/irq.c:172) and never modified again. It's effectively an
immutable field after initialization, so reading it without the lock
held is safe.
Thanks.
~ Oleksii
|