With this it is questionable whether retaining struct domain's nr_pirqs is actually necessary - the value now only serves for bounds checking, and this boundary could easily be nr_irqs. Another thing to consider is whether it's worth storing the pirq number in struct pirq, to avoid passing the number and a pointer to quite a number of functions. Note that ia64, the build of which is broken currently anyway, is only partially fixed up. v2: adjustments for split setup/teardown of translation data Signed-off-by: Jan Beulich --- 2011-04-29.orig/xen/arch/ia64/vmx/vmx_interrupt.c +++ 2011-04-29/xen/arch/ia64/vmx/vmx_interrupt.c @@ -155,13 +155,13 @@ void hvm_isa_irq_deassert(struct domain /* dummy */ } -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable) +int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable) { /* dummy */ return -ENOSYS; } -void msixtbl_pt_unregister(struct domain *d, int pirq) +void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq) { /* dummy */ } --- 2011-04-29.orig/xen/arch/ia64/xen/hypercall.c +++ 2011-04-29/xen/arch/ia64/xen/hypercall.c @@ -65,8 +65,11 @@ static long __do_pirq_guest_eoi(struct d { if ( pirq < 0 || pirq >= NR_IRQS ) return -EINVAL; - if ( d->arch.pirq_eoi_map ) - evtchn_unmask(d->pirq_to_evtchn[pirq]); + if ( d->arch.pirq_eoi_map ) { + spin_lock(&d->event_lock); + evtchn_unmask(pirq_to_evtchn(d, pirq)); + spin_unlock(&d->event_lock); + } return pirq_guest_eoi(d, pirq); } --- 2011-04-29.orig/xen/arch/ia64/xen/irq.c +++ 2011-04-29/xen/arch/ia64/xen/irq.c @@ -363,15 +363,17 @@ void __do_IRQ_guest(int irq) irq_desc_t *desc = &irq_desc[irq]; irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct domain *d; + struct pirq *pirq; int i, already_pending = 0; for ( i = 0; i < action->nr_guests; i++ ) { d = action->guest[i]; + pirq = pirq_info(d, irq); if ( (action->ack_type != ACKTYPE_NONE) && - !test_and_set_bit(irq, &d->pirq_mask) ) + !test_and_set_bool(pirq->masked) ) action->in_flight++; - if ( hvm_do_IRQ_dpci(d, irq) ) + if ( hvm_do_IRQ_dpci(d, pirq) ) { if ( action->ack_type == ACKTYPE_NONE ) { @@ -379,7 +381,7 @@ void __do_IRQ_guest(int irq) desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */ } } - else if ( send_guest_pirq(d, irq) && + else if ( send_guest_pirq(d, pirq) && (action->ack_type == ACKTYPE_NONE) ) { already_pending++; @@ -423,26 +425,23 @@ static int pirq_acktype(int irq) return ACKTYPE_NONE; } -int pirq_guest_eoi(struct domain *d, int irq) +int pirq_guest_eoi(struct domain *d, struct pirq *pirq) { irq_desc_t *desc; irq_guest_action_t *action; - if ( (irq < 0) || (irq >= NR_IRQS) ) - return -EINVAL; - desc = &irq_desc[irq]; spin_lock_irq(&desc->lock); action = (irq_guest_action_t *)desc->action; if ( action->ack_type == ACKTYPE_NONE ) { - ASSERT(!test_bit(irq, d->pirq_mask)); + ASSERT(!pirq->masked); stop_timer(&irq_guest_eoi_timer[irq]); _irq_guest_eoi(desc); } - if ( test_and_clear_bit(irq, &d->pirq_mask) && (--action->in_flight == 0) ) + if ( test_and_clear_bool(pirq->masked) && (--action->in_flight == 0) ) { ASSERT(action->ack_type == ACKTYPE_UNMASK); desc->handler->end(irq); @@ -455,22 +454,27 @@ int pirq_guest_eoi(struct domain *d, int int pirq_guest_unmask(struct domain *d) { - int irq; + unsigned int pirq = 0, n, i; + unsigned long indexes[16]; + struct pirq *pirqs[ARRAY_SIZE(indexes)]; shared_info_t *s = d->shared_info; - for ( irq = find_first_bit(d->pirq_mask, NR_IRQS); - irq < NR_IRQS; - irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) ) - { - if ( !test_bit(d->pirq_to_evtchn[irq], &s->evtchn_mask[0]) ) - pirq_guest_eoi(d, irq); - - } + do { + n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq, + ARRAY_SIZE(pirqs), indexes); + for ( i = 0; i < n; ++i ) + { + pirq = indexes[i]; + if ( pirqs[i]->masked && + !test_bit(pirqs[i]->evtchn, &s->evtchn_mask[0]) ) + pirq_guest_eoi(d, pirqs[i]); + } + } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); return 0; } -int pirq_guest_bind(struct vcpu *v, int irq, int will_share) +int pirq_guest_bind(struct vcpu *v, int irq, struct pirq *pirq, int will_share) { irq_desc_t *desc = &irq_desc[irq]; irq_guest_action_t *action; @@ -554,7 +558,7 @@ int pirq_guest_bind(struct vcpu *v, int return rc; } -void pirq_guest_unbind(struct domain *d, int irq) +void pirq_guest_unbind(struct domain *d, int irq, struct pirq *pirq) { irq_desc_t *desc = &irq_desc[irq]; irq_guest_action_t *action; @@ -572,7 +576,7 @@ void pirq_guest_unbind(struct domain *d, action->nr_guests--; if ( action->ack_type == ACKTYPE_UNMASK ) - if ( test_and_clear_bit(irq, &d->pirq_mask) && + if ( test_and_clear_bool(pirq->masked) && (--action->in_flight == 0) ) desc->handler->end(irq); --- 2011-04-29.orig/xen/arch/x86/domain.c +++ 2011-04-29/xen/arch/x86/domain.c @@ -608,25 +608,9 @@ int arch_domain_create(struct domain *d, share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); - d->arch.pirq_irq = xmalloc_array(int, d->nr_pirqs); - if ( !d->arch.pirq_irq ) - goto fail; - memset(d->arch.pirq_irq, 0, - d->nr_pirqs * sizeof(*d->arch.pirq_irq)); - if ( (rc = init_domain_irq_mapping(d)) != 0 ) goto fail; - if ( is_hvm_domain(d) ) - { - d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs); - if ( !d->arch.pirq_emuirq ) - goto fail; - for (i = 0; i < d->nr_pirqs; i++) - d->arch.pirq_emuirq[i] = IRQ_UNBOUND; - } - - if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; @@ -660,8 +644,6 @@ int arch_domain_create(struct domain *d, fail: d->is_dying = DOMDYING_dead; vmce_destroy_msr(d); - xfree(d->arch.pirq_irq); - xfree(d->arch.pirq_emuirq); cleanup_domain_irq_mapping(d); free_xenheap_page(d->shared_info); if ( paging_initialised ) @@ -714,8 +696,6 @@ void arch_domain_destroy(struct domain * #endif free_xenheap_page(d->shared_info); - xfree(d->arch.pirq_irq); - xfree(d->arch.pirq_emuirq); cleanup_domain_irq_mapping(d); } --- 2011-04-29.orig/xen/arch/x86/hvm/hvm.c +++ 2011-04-29/xen/arch/x86/hvm/hvm.c @@ -252,32 +252,36 @@ void hvm_migrate_timers(struct vcpu *v) pt_migrate(v); } -void hvm_migrate_pirqs(struct vcpu *v) +static int hvm_migrate_pirq(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) { - int pirq, irq; - struct irq_desc *desc; - struct domain *d = v->domain; - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - - if ( !iommu_enabled || (hvm_irq_dpci == NULL) ) - return; + struct vcpu *v = arg; - spin_lock(&d->event_lock); - for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs); - pirq < d->nr_pirqs; - pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) ) + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) && + (pirq_dpci->gmsi.dest_vcpu_id == v->vcpu_id) ) { - if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) || - (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) ) - continue; - desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL); - if (!desc) - continue; - irq = desc - irq_desc; - ASSERT(MSI_IRQ(irq)); + struct irq_desc *desc = + pirq_spin_lock_irq_desc(d, dpci_pirq(pirq_dpci), NULL); + + if ( !desc ) + return 0; + ASSERT(MSI_IRQ(desc - irq_desc)); irq_set_affinity(desc, cpumask_of(v->processor)); spin_unlock_irq(&desc->lock); } + + return 0; +} + +void hvm_migrate_pirqs(struct vcpu *v) +{ + struct domain *d = v->domain; + + if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci ) + return; + + spin_lock(&d->event_lock); + pt_pirq_iterate(d, hvm_migrate_pirq, v); spin_unlock(&d->event_lock); } @@ -501,8 +505,6 @@ int hvm_domain_initialise(struct domain return rc; } -extern void msixtbl_pt_cleanup(struct domain *d); - void hvm_domain_relinquish_resources(struct domain *d) { hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); --- 2011-04-29.orig/xen/arch/x86/hvm/irq.c +++ 2011-04-29/xen/arch/x86/hvm/irq.c @@ -33,7 +33,7 @@ static void assert_irq(struct domain *d, int pirq = domain_emuirq_to_pirq(d, ioapic_gsi); if ( pirq != IRQ_UNBOUND ) { - send_guest_pirq(d, pirq); + send_guest_pirq(d, pirq_info(d, pirq)); return; } vioapic_irq_positive_edge(d, ioapic_gsi); --- 2011-04-29.orig/xen/arch/x86/hvm/vmsi.c +++ 2011-04-29/xen/arch/x86/hvm/vmsi.c @@ -65,11 +65,10 @@ static void vmsi_inj_irq( } } -int vmsi_deliver(struct domain *d, int pirq) +int vmsi_deliver(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci) { - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - uint32_t flags = hvm_irq_dpci->mirq[pirq].gmsi.gflags; - int vector = hvm_irq_dpci->mirq[pirq].gmsi.gvec; + uint32_t flags = pirq_dpci->gmsi.gflags; + int vector = pirq_dpci->gmsi.gvec; uint8_t dest = (uint8_t)flags; uint8_t dest_mode = !!(flags & VMSI_DM_MASK); uint8_t delivery_mode = (flags & VMSI_DELIV_MASK) >> GLFAGS_SHIFT_DELIV_MODE; @@ -82,11 +81,7 @@ int vmsi_deliver(struct domain *d, int p "vector=%x trig_mode=%x\n", dest, dest_mode, delivery_mode, vector, trig_mode); - if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) ) - { - gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq); - return 0; - } + ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI); switch ( delivery_mode ) { @@ -349,7 +344,7 @@ static void del_msixtbl_entry(struct msi call_rcu(&entry->rcu, free_msixtbl_entry); } -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable) +int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable) { struct irq_desc *irq_desc; struct msi_desc *msi_desc; @@ -358,6 +353,7 @@ int msixtbl_pt_register(struct domain *d int r = -EINVAL; ASSERT(spin_is_locked(&pcidevs_lock)); + ASSERT(spin_is_locked(&d->event_lock)); /* * xmalloc() with irq_disabled causes the failure of check_lock() @@ -367,7 +363,7 @@ int msixtbl_pt_register(struct domain *d if ( !new_entry ) return -ENOMEM; - irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); + irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL); if ( !irq_desc ) { xfree(new_entry); @@ -404,7 +400,7 @@ out: return r; } -void msixtbl_pt_unregister(struct domain *d, int pirq) +void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq) { struct irq_desc *irq_desc; struct msi_desc *msi_desc; @@ -412,8 +408,9 @@ void msixtbl_pt_unregister(struct domain struct msixtbl_entry *entry; ASSERT(spin_is_locked(&pcidevs_lock)); + ASSERT(spin_is_locked(&d->event_lock)); - irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); + irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL); if ( !irq_desc ) return; @@ -447,7 +444,7 @@ found: spin_unlock_irq(&irq_desc->lock); } -void msixtbl_pt_cleanup(struct domain *d, int pirq) +void msixtbl_pt_cleanup(struct domain *d) { struct msixtbl_entry *entry, *temp; unsigned long flags; --- 2011-04-29.orig/xen/arch/x86/irq.c +++ 2011-04-29/xen/arch/x86/irq.c @@ -814,7 +814,7 @@ static void irq_guest_eoi_timer_fn(void { struct domain *d = action->guest[i]; unsigned int pirq = domain_irq_to_pirq(d, irq); - if ( test_and_clear_bit(pirq, d->pirq_mask) ) + if ( test_and_clear_bool(pirq_info(d, pirq)->masked) ) action->in_flight--; } } @@ -874,11 +874,12 @@ static void __do_IRQ_guest(int irq) for ( i = 0; i < action->nr_guests; i++ ) { - unsigned int pirq; + struct pirq *pirq; + d = action->guest[i]; - pirq = domain_irq_to_pirq(d, irq); + pirq = pirq_info(d, domain_irq_to_pirq(d, irq)); if ( (action->ack_type != ACKTYPE_NONE) && - !test_and_set_bit(pirq, d->pirq_mask) ) + !test_and_set_bool(pirq->masked) ) action->in_flight++; if ( hvm_do_IRQ_dpci(d, pirq) ) { @@ -950,28 +951,71 @@ struct irq_desc *domain_spin_lock_irq_de return desc; } -static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq) +/* + * Same with struct pirq already looked up, and d->event_lock already + * held (thus the PIRQ <-> IRQ mapping can't change under our feet). + */ +struct irq_desc *pirq_spin_lock_irq_desc( + struct domain *d, const struct pirq *pirq, unsigned long *pflags) +{ + int irq = pirq->arch.irq; + struct irq_desc *desc; + unsigned long flags; + + ASSERT(spin_is_locked(&d->event_lock)); + + if ( irq <= 0 ) + return NULL; + + desc = irq_to_desc(irq); + spin_lock_irqsave(&desc->lock, flags); + + if ( pflags ) + *pflags = flags; + + ASSERT(pirq == pirq_info(d, domain_irq_to_pirq(d, irq))); + ASSERT(irq == pirq->arch.irq); + + return desc; +} + +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq, + struct pirq **pinfo) { int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL, NULL, NULL); + struct pirq *info; - return err != -EEXIST ? err : 0; + if ( err && err != -EEXIST ) + return err; + info = pirq_get_info(d, pirq); + if ( !info ) + { + if ( !err ) + radix_tree_delete(&d->arch.irq_pirq, irq, NULL); + return -ENOMEM; + } + *pinfo = info; + return 0; } -static void set_domain_irq_pirq(struct domain *d, int irq, int pirq) +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq, + struct pirq *info) { *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq; - d->arch.pirq_irq[pirq] = irq; + info->arch.irq = irq; } -static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq) +static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) { - d->arch.pirq_irq[pirq] = 0; + pirq->arch.irq = 0; *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL; } -static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq) +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq, + struct pirq *info) { + pirq_cleanup_check(info, d, pirq); radix_tree_delete(&d->arch.irq_pirq, irq, NULL); } @@ -987,10 +1031,12 @@ int init_domain_irq_mapping(struct domai for ( i = 1; platform_legacy_irq(i); ++i ) if ( !IO_APIC_IRQ(i) ) { - err = prepare_domain_irq_pirq(d, i, i); + struct pirq *info; + + err = prepare_domain_irq_pirq(d, i, i, &info); if ( err ) break; - set_domain_irq_pirq(d, i, i); + set_domain_irq_pirq(d, i, i, info); } return err; @@ -1008,6 +1054,48 @@ void cleanup_domain_irq_mapping(struct d irq_slot_free, NULL); } +struct pirq *alloc_pirq_struct(struct domain *d) +{ + size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) : + offsetof(struct pirq, arch.hvm); + struct pirq *pirq = xmalloc_bytes(sz); + + if ( pirq ) + { + memset(pirq, 0, sz); + if ( is_hvm_domain(d) ) + { + pirq->arch.hvm.emuirq = IRQ_UNBOUND; + pt_pirq_init(d, &pirq->arch.hvm.dpci); + } + } + + return pirq; +} + +void (pirq_cleanup_check)(struct pirq *info, struct domain *d, int pirq) +{ + /* + * Check whether all fields have their default values, and delete + * the entry from the tree if so. + * + * NB: Common parts were already checked. + */ + if ( info->arch.irq ) + return; + + if ( is_hvm_domain(d) ) + { + if ( info->arch.hvm.emuirq != IRQ_UNBOUND ) + return; + if ( !pt_pirq_cleanup_check(&info->arch.hvm.dpci) ) + return; + } + + if ( radix_tree_delete(&d->pirq_tree, pirq, NULL) != info ) + BUG(); +} + /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ static void flush_ready_eoi(void) { @@ -1068,18 +1156,22 @@ static void set_eoi_ready(void *data) flush_ready_eoi(); } -static void __pirq_guest_eoi(struct domain *d, int pirq) +void pirq_guest_eoi(struct domain *d, struct pirq *pirq) +{ + struct irq_desc *desc; + + ASSERT(local_irq_is_enabled()); + desc = pirq_spin_lock_irq_desc(d, pirq, NULL); + if ( desc ) + desc_guest_eoi(d, desc, pirq); +} + +void desc_guest_eoi(struct domain *d, struct irq_desc *desc, struct pirq *pirq) { - struct irq_desc *desc; irq_guest_action_t *action; cpumask_t cpu_eoi_map; int irq; - ASSERT(local_irq_is_enabled()); - desc = domain_spin_lock_irq_desc(d, pirq, NULL); - if ( desc == NULL ) - return; - if ( !(desc->status & IRQ_GUEST) ) { spin_unlock_irq(&desc->lock); @@ -1091,12 +1183,12 @@ static void __pirq_guest_eoi(struct doma if ( action->ack_type == ACKTYPE_NONE ) { - ASSERT(!test_bit(pirq, d->pirq_mask)); + ASSERT(!pirq->masked); stop_timer(&action->eoi_timer); _irq_guest_eoi(desc); } - if ( unlikely(!test_and_clear_bit(pirq, d->pirq_mask)) || + if ( unlikely(!test_and_clear_bool(pirq->masked)) || unlikely(--action->in_flight != 0) ) { spin_unlock_irq(&desc->lock); @@ -1131,27 +1223,23 @@ static void __pirq_guest_eoi(struct doma on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0); } -int pirq_guest_eoi(struct domain *d, int irq) -{ - if ( (irq < 0) || (irq >= d->nr_pirqs) ) - return -EINVAL; - - __pirq_guest_eoi(d, irq); - - return 0; -} - int pirq_guest_unmask(struct domain *d) { - unsigned int irq, nr = d->nr_pirqs; + unsigned int pirq = 0, n, i; + unsigned long indexes[16]; + struct pirq *pirqs[ARRAY_SIZE(indexes)]; - for ( irq = find_first_bit(d->pirq_mask, nr); - irq < nr; - irq = find_next_bit(d->pirq_mask, nr, irq+1) ) - { - if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask)) ) - __pirq_guest_eoi(d, irq); - } + do { + n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq, + ARRAY_SIZE(pirqs), indexes); + for ( i = 0; i < n; ++i ) + { + pirq = indexes[i]; + if ( pirqs[i]->masked && + !test_bit(pirqs[i]->evtchn, &shared_info(d, evtchn_mask)) ) + pirq_guest_eoi(d, pirqs[i]); + } + } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); return 0; } @@ -1221,7 +1309,7 @@ int pirq_shared(struct domain *d, int pi return shared; } -int pirq_guest_bind(struct vcpu *v, int pirq, int will_share) +int pirq_guest_bind(struct vcpu *v, int pirq, struct pirq *info, int will_share) { unsigned int irq; struct irq_desc *desc; @@ -1233,7 +1321,7 @@ int pirq_guest_bind(struct vcpu *v, int BUG_ON(!local_irq_is_enabled()); retry: - desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL); + desc = pirq_spin_lock_irq_desc(v->domain, info, NULL); if ( desc == NULL ) { rc = -EINVAL; @@ -1334,7 +1422,7 @@ int pirq_guest_bind(struct vcpu *v, int } static irq_guest_action_t *__pirq_guest_unbind( - struct domain *d, int pirq, struct irq_desc *desc) + struct domain *d, int pirq, struct pirq *info, struct irq_desc *desc) { unsigned int irq; irq_guest_action_t *action; @@ -1363,13 +1451,13 @@ static irq_guest_action_t *__pirq_guest_ switch ( action->ack_type ) { case ACKTYPE_UNMASK: - if ( test_and_clear_bit(pirq, d->pirq_mask) && + if ( test_and_clear_bool(info->masked) && (--action->in_flight == 0) ) desc->handler->end(irq); break; case ACKTYPE_EOI: /* NB. If #guests == 0 then we clear the eoi_map later on. */ - if ( test_and_clear_bit(pirq, d->pirq_mask) && + if ( test_and_clear_bool(info->masked) && (--action->in_flight == 0) && (action->nr_guests != 0) ) { @@ -1387,9 +1475,9 @@ static irq_guest_action_t *__pirq_guest_ /* * The guest cannot re-bind to this IRQ until this function returns. So, - * when we have flushed this IRQ from pirq_mask, it should remain flushed. + * when we have flushed this IRQ from ->masked, it should remain flushed. */ - BUG_ON(test_bit(pirq, d->pirq_mask)); + BUG_ON(info->masked); if ( action->nr_guests != 0 ) return NULL; @@ -1427,7 +1515,7 @@ static irq_guest_action_t *__pirq_guest_ return action; } -void pirq_guest_unbind(struct domain *d, int pirq) +void pirq_guest_unbind(struct domain *d, int pirq, struct pirq *info) { irq_guest_action_t *oldaction = NULL; struct irq_desc *desc; @@ -1436,19 +1524,19 @@ void pirq_guest_unbind(struct domain *d, WARN_ON(!spin_is_locked(&d->event_lock)); BUG_ON(!local_irq_is_enabled()); - desc = domain_spin_lock_irq_desc(d, pirq, NULL); + desc = pirq_spin_lock_irq_desc(d, info, NULL); if ( desc == NULL ) { - irq = -domain_pirq_to_irq(d, pirq); + irq = -info->arch.irq; BUG_ON(irq <= 0); desc = irq_to_desc(irq); spin_lock_irq(&desc->lock); - clear_domain_irq_pirq(d, irq, pirq); + clear_domain_irq_pirq(d, irq, info); } else { - oldaction = __pirq_guest_unbind(d, pirq, desc); + oldaction = __pirq_guest_unbind(d, pirq, info, desc); } spin_unlock_irq(&desc->lock); @@ -1459,10 +1547,10 @@ void pirq_guest_unbind(struct domain *d, xfree(oldaction); } else if ( irq > 0 ) - cleanup_domain_irq_pirq(d, irq, pirq); + cleanup_domain_irq_pirq(d, irq, pirq, info); } -static int pirq_guest_force_unbind(struct domain *d, int irq) +static int pirq_guest_force_unbind(struct domain *d, int irq, struct pirq *info) { struct irq_desc *desc; irq_guest_action_t *action, *oldaction = NULL; @@ -1471,7 +1559,7 @@ static int pirq_guest_force_unbind(struc WARN_ON(!spin_is_locked(&d->event_lock)); BUG_ON(!local_irq_is_enabled()); - desc = domain_spin_lock_irq_desc(d, irq, NULL); + desc = pirq_spin_lock_irq_desc(d, info, NULL); BUG_ON(desc == NULL); if ( !(desc->status & IRQ_GUEST) ) @@ -1491,7 +1579,7 @@ static int pirq_guest_force_unbind(struc goto out; bound = 1; - oldaction = __pirq_guest_unbind(d, irq, desc); + oldaction = __pirq_guest_unbind(d, irq, info, desc); out: spin_unlock_irq(&desc->lock); @@ -1505,6 +1593,13 @@ static int pirq_guest_force_unbind(struc return bound; } +static inline bool_t is_free_pirq(const struct domain *d, + const struct pirq *pirq) +{ + return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) || + pirq->arch.hvm.emuirq == IRQ_UNBOUND)); +} + int get_free_pirq(struct domain *d, int type, int index) { int i; @@ -1514,29 +1609,17 @@ int get_free_pirq(struct domain *d, int if ( type == MAP_PIRQ_TYPE_GSI ) { for ( i = 16; i < nr_irqs_gsi; i++ ) - if ( !d->arch.pirq_irq[i] ) - { - if ( !is_hvm_domain(d) || - d->arch.pirq_emuirq[i] == IRQ_UNBOUND ) - break; - } - if ( i == nr_irqs_gsi ) - return -ENOSPC; + if ( is_free_pirq(d, pirq_info(d, i)) ) + return i; } else { for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- ) - if ( !d->arch.pirq_irq[i] ) - { - if ( !is_hvm_domain(d) || - d->arch.pirq_emuirq[i] == IRQ_UNBOUND ) - break; - } - if ( i < nr_irqs_gsi ) - return -ENOSPC; + if ( is_free_pirq(d, pirq_info(d, i)) ) + return i; } - return i; + return -ENOSPC; } int map_domain_pirq( @@ -1544,6 +1627,7 @@ int map_domain_pirq( { int ret = 0; int old_irq, old_pirq; + struct pirq *info; struct irq_desc *desc; unsigned long flags; struct msi_desc *msi_desc; @@ -1583,7 +1667,7 @@ int map_domain_pirq( return ret; } - ret = prepare_domain_irq_pirq(d, irq, pirq); + ret = prepare_domain_irq_pirq(d, irq, pirq, &info); if ( ret ) return ret; @@ -1608,20 +1692,20 @@ int map_domain_pirq( dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", d->domain_id, irq); desc->handler = &pci_msi_type; - set_domain_irq_pirq(d, irq, pirq); + set_domain_irq_pirq(d, irq, pirq, info); setup_msi_irq(pdev, msi_desc, irq); spin_unlock_irqrestore(&desc->lock, flags); } else { spin_lock_irqsave(&desc->lock, flags); - set_domain_irq_pirq(d, irq, pirq); + set_domain_irq_pirq(d, irq, pirq, info); spin_unlock_irqrestore(&desc->lock, flags); } done: if ( ret ) - cleanup_domain_irq_pirq(d, irq, pirq); + cleanup_domain_irq_pirq(d, irq, pirq, info); return ret; } @@ -1632,6 +1716,7 @@ int unmap_domain_pirq(struct domain *d, struct irq_desc *desc; int irq, ret = 0; bool_t forced_unbind; + struct pirq *info; struct msi_desc *msi_desc = NULL; if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) @@ -1640,8 +1725,8 @@ int unmap_domain_pirq(struct domain *d, ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(spin_is_locked(&d->event_lock)); - irq = domain_pirq_to_irq(d, pirq); - if ( irq <= 0 ) + info = pirq_info(d, pirq); + if ( !info || (irq = info->arch.irq) <= 0 ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n", d->domain_id, pirq); @@ -1649,7 +1734,7 @@ int unmap_domain_pirq(struct domain *d, goto done; } - forced_unbind = pirq_guest_force_unbind(d, pirq); + forced_unbind = pirq_guest_force_unbind(d, pirq, info); if ( forced_unbind ) dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n", d->domain_id, pirq); @@ -1664,10 +1749,10 @@ int unmap_domain_pirq(struct domain *d, BUG_ON(irq != domain_pirq_to_irq(d, pirq)); if ( !forced_unbind ) - clear_domain_irq_pirq(d, irq, pirq); + clear_domain_irq_pirq(d, irq, info); else { - d->arch.pirq_irq[pirq] = -irq; + info->arch.irq = -irq; *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)-pirq; } @@ -1676,7 +1761,7 @@ int unmap_domain_pirq(struct domain *d, msi_free_irq(msi_desc); if ( !forced_unbind ) - cleanup_domain_irq_pirq(d, irq, pirq); + cleanup_domain_irq_pirq(d, irq, pirq, info); ret = irq_deny_access(d, pirq); if ( ret ) @@ -1698,7 +1783,7 @@ void free_domain_pirqs(struct domain *d) spin_lock(&d->event_lock); for ( i = 0; i < d->nr_pirqs; i++ ) - if ( d->arch.pirq_irq[i] > 0 ) + if ( domain_pirq_to_irq(d, i) > 0 ) unmap_domain_pirq(d, i); spin_unlock(&d->event_lock); @@ -1714,6 +1799,7 @@ static void dump_irqs(unsigned char key) struct irq_cfg *cfg; irq_guest_action_t *action; struct domain *d; + const struct pirq *info; unsigned long flags; printk("Guest interrupt information:\n"); @@ -1748,20 +1834,18 @@ static void dump_irqs(unsigned char key) { d = action->guest[i]; pirq = domain_irq_to_pirq(d, irq); + info = pirq_info(d, pirq); printk("%u:%3d(%c%c%c%c)", d->domain_id, pirq, - (test_bit(d->pirq_to_evtchn[pirq], + (test_bit(info->evtchn, &shared_info(d, evtchn_pending)) ? 'P' : '-'), - (test_bit(d->pirq_to_evtchn[pirq] / - BITS_PER_EVTCHN_WORD(d), + (test_bit(info->evtchn / BITS_PER_EVTCHN_WORD(d), &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), - (test_bit(d->pirq_to_evtchn[pirq], - &shared_info(d, evtchn_mask)) ? + (test_bit(info->evtchn, &shared_info(d, evtchn_mask)) ? 'M' : '-'), - (test_bit(pirq, d->pirq_mask) ? - 'M' : '-')); + (info->masked ? 'M' : '-')); if ( i != action->nr_guests ) printk(","); } @@ -1868,6 +1952,7 @@ void fixup_irqs(void) int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq) { int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND; + struct pirq *info; ASSERT(spin_is_locked(&d->event_lock)); @@ -1894,6 +1979,10 @@ int map_domain_emuirq_pirq(struct domain return 0; } + info = pirq_get_info(d, pirq); + if ( !info ) + return -ENOMEM; + /* do not store emuirq mappings for pt devices */ if ( emuirq != IRQ_PT ) { @@ -1909,10 +1998,11 @@ int map_domain_emuirq_pirq(struct domain (void *)((long)pirq + 1); break; default: + pirq_cleanup_check(info, d, pirq); return err; } } - d->arch.pirq_emuirq[pirq] = emuirq; + info->arch.hvm.emuirq = emuirq; return 0; } @@ -1920,6 +2010,7 @@ int map_domain_emuirq_pirq(struct domain int unmap_domain_pirq_emuirq(struct domain *d, int pirq) { int emuirq, ret = 0; + struct pirq *info; if ( !is_hvm_domain(d) ) return -EINVAL; @@ -1938,7 +2029,12 @@ int unmap_domain_pirq_emuirq(struct doma goto done; } - d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND; + info = pirq_info(d, pirq); + if ( info ) + { + info->arch.hvm.emuirq = IRQ_UNBOUND; + pirq_cleanup_check(info, d, pirq); + } if ( emuirq != IRQ_PT ) radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL); @@ -1946,16 +2042,9 @@ int unmap_domain_pirq_emuirq(struct doma return ret; } -int hvm_domain_use_pirq(struct domain *d, int pirq) +bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq) { - int emuirq; - - if ( !is_hvm_domain(d) ) - return 0; - - emuirq = domain_pirq_to_emuirq(d, pirq); - if ( emuirq != IRQ_UNBOUND && d->pirq_to_evtchn[pirq] != 0 ) - return 1; - else - return 0; + return is_hvm_domain(d) && + pirq->arch.hvm.emuirq != IRQ_UNBOUND && + pirq->evtchn != 0; } --- 2011-04-29.orig/xen/arch/x86/physdev.c +++ 2011-04-29/xen/arch/x86/physdev.c @@ -258,20 +258,28 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H { case PHYSDEVOP_eoi: { struct physdev_eoi eoi; + struct pirq *pirq; + ret = -EFAULT; if ( copy_from_guest(&eoi, arg, 1) != 0 ) break; ret = -EINVAL; if ( eoi.irq >= v->domain->nr_pirqs ) break; + spin_lock(&v->domain->event_lock); + pirq = pirq_info(v->domain, eoi.irq); + if ( !pirq ) { + spin_unlock(&v->domain->event_lock); + break; + } if ( !is_hvm_domain(v->domain) && v->domain->arch.pv_domain.pirq_eoi_map ) - evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]); + evtchn_unmask(pirq->evtchn); if ( !is_hvm_domain(v->domain) || - domain_pirq_to_emuirq(v->domain, eoi.irq) == IRQ_PT ) - ret = pirq_guest_eoi(v->domain, eoi.irq); - else - ret = 0; + pirq->arch.hvm.emuirq == IRQ_PT ) + pirq_guest_eoi(v->domain, pirq); + spin_unlock(&v->domain->event_lock); + ret = 0; break; } @@ -564,11 +572,23 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; spin_lock(&d->event_lock); - out.pirq = get_free_pirq(d, out.type, 0); - d->arch.pirq_irq[out.pirq] = PIRQ_ALLOCATED; + ret = get_free_pirq(d, out.type, 0); + if ( ret >= 0 ) + { + struct pirq *info = pirq_get_info(d, ret); + + if ( info ) + info->arch.irq = PIRQ_ALLOCATED; + else + ret = -ENOMEM; + } spin_unlock(&d->event_lock); - ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0; + if ( ret >= 0 ) + { + out.pirq = ret; + ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0; + } rcu_unlock_domain(d); break; --- 2011-04-29.orig/xen/common/domain.c +++ 2011-04-29/xen/common/domain.c @@ -290,13 +290,7 @@ struct domain *domain_create( if ( d->nr_pirqs > nr_irqs ) d->nr_pirqs = nr_irqs; - d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs); - d->pirq_mask = xmalloc_array( - unsigned long, BITS_TO_LONGS(d->nr_pirqs)); - if ( (d->pirq_to_evtchn == NULL) || (d->pirq_mask == NULL) ) - goto fail; - memset(d->pirq_to_evtchn, 0, d->nr_pirqs * sizeof(*d->pirq_to_evtchn)); - bitmap_zero(d->pirq_mask, d->nr_pirqs); + INIT_RADIX_TREE(&d->pirq_tree, 0); if ( evtchn_init(d) != 0 ) goto fail; @@ -346,6 +340,7 @@ struct domain *domain_create( { evtchn_destroy(d); evtchn_destroy_final(d); + radix_tree_destroy(&d->pirq_tree, free_pirq_struct, NULL); } if ( init_status & INIT_rangeset ) rangeset_domain_destroy(d); @@ -353,8 +348,6 @@ struct domain *domain_create( watchdog_domain_destroy(d); if ( init_status & INIT_xsm ) xsm_free_security_domain(d); - xfree(d->pirq_mask); - xfree(d->pirq_to_evtchn); free_cpumask_var(d->domain_dirty_cpumask); free_domain_struct(d); return NULL; @@ -680,8 +673,7 @@ static void complete_domain_destroy(stru evtchn_destroy_final(d); - xfree(d->pirq_mask); - xfree(d->pirq_to_evtchn); + radix_tree_destroy(&d->pirq_tree, free_pirq_struct, NULL); xsm_free_security_domain(d); free_cpumask_var(d->domain_dirty_cpumask); @@ -963,6 +955,20 @@ long vm_assist(struct domain *p, unsigne return -ENOSYS; } +struct pirq *pirq_get_info(struct domain *d, int pirq) +{ + struct pirq *info = pirq_info(d, pirq); + + if ( !info && (info = alloc_pirq_struct(d)) != NULL && + radix_tree_insert(&d->pirq_tree, pirq, info, NULL, NULL) ) + { + free_pirq_struct(info); + info = NULL; + } + + return info; +} + struct migrate_info { long (*func)(void *data); void *data; --- 2011-04-29.orig/xen/common/event_channel.c +++ 2011-04-29/xen/common/event_channel.c @@ -325,6 +325,7 @@ static long evtchn_bind_pirq(evtchn_bind struct evtchn *chn; struct domain *d = current->domain; struct vcpu *v = d->vcpu[0]; + struct pirq *info; int port, pirq = bind->pirq; long rc; @@ -336,7 +337,7 @@ static long evtchn_bind_pirq(evtchn_bind spin_lock(&d->event_lock); - if ( d->pirq_to_evtchn[pirq] != 0 ) + if ( pirq_to_evtchn(d, pirq) != 0 ) ERROR_EXIT(-EEXIST); if ( (port = get_free_port(d)) < 0 ) @@ -344,14 +345,18 @@ static long evtchn_bind_pirq(evtchn_bind chn = evtchn_from_port(d, port); - d->pirq_to_evtchn[pirq] = port; + info = pirq_get_info(d, pirq); + if ( !info ) + ERROR_EXIT(-ENOMEM); + info->evtchn = port; rc = (!is_hvm_domain(d) - ? pirq_guest_bind( - v, pirq, !!(bind->flags & BIND_PIRQ__WILL_SHARE)) + ? pirq_guest_bind(v, pirq, info, + !!(bind->flags & BIND_PIRQ__WILL_SHARE)) : 0); if ( rc != 0 ) { - d->pirq_to_evtchn[pirq] = 0; + info->evtchn = 0; + pirq_cleanup_check(info, d, pirq); goto out; } @@ -404,12 +409,18 @@ static long __evtchn_close(struct domain case ECS_UNBOUND: break; - case ECS_PIRQ: + case ECS_PIRQ: { + struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq); + + if ( !pirq ) + break; if ( !is_hvm_domain(d1) ) - pirq_guest_unbind(d1, chn1->u.pirq.irq); - d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0; + pirq_guest_unbind(d1, chn1->u.pirq.irq, pirq); + pirq->evtchn = 0; + pirq_cleanup_check(pirq, d1, chn1->u.pirq.irq); unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]); break; + } case ECS_VIRQ: for_each_vcpu ( d1, v ) @@ -659,9 +670,9 @@ void send_guest_global_virq(struct domai spin_unlock_irqrestore(&v->virq_lock, flags); } -int send_guest_pirq(struct domain *d, int pirq) +int send_guest_pirq(struct domain *d, const struct pirq *pirq) { - int port = d->pirq_to_evtchn[pirq]; + int port; struct evtchn *chn; /* @@ -670,7 +681,7 @@ int send_guest_pirq(struct domain *d, in * HVM guests: Port is legitimately zero when the guest disables the * emulated interrupt/evtchn. */ - if ( port == 0 ) + if ( pirq == NULL || (port = pirq->evtchn) == 0 ) { BUG_ON(!is_hvm_domain(d)); return 0; @@ -812,13 +823,10 @@ int evtchn_unmask(unsigned int port) struct domain *d = current->domain; struct vcpu *v; - spin_lock(&d->event_lock); + ASSERT(spin_is_locked(&d->event_lock)); if ( unlikely(!port_is_valid(d, port)) ) - { - spin_unlock(&d->event_lock); return -EINVAL; - } v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id]; @@ -834,8 +842,6 @@ int evtchn_unmask(unsigned int port) vcpu_mark_events_pending(v); } - spin_unlock(&d->event_lock); - return 0; } @@ -960,7 +966,9 @@ long do_event_channel_op(int cmd, XEN_GU struct evtchn_unmask unmask; if ( copy_from_guest(&unmask, arg, 1) != 0 ) return -EFAULT; + spin_lock(¤t->domain->event_lock); rc = evtchn_unmask(unmask.port); + spin_unlock(¤t->domain->event_lock); break; } --- 2011-04-29.orig/xen/common/radix-tree.c +++ 2011-04-29/xen/common/radix-tree.c @@ -225,7 +225,8 @@ EXPORT_SYMBOL(radix_tree_lookup); static unsigned int __lookup(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index) + unsigned int max_items, unsigned long *indexes, + unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; @@ -235,8 +236,11 @@ __lookup(struct radix_tree_root *root, v height = root->height; if (index > radix_tree_maxindex(height)) if (height == 0) { - if (root->rnode && index == 0) + if (root->rnode && index == 0) { + if (indexes) + indexes[nr_found] = index; results[nr_found++] = root->rnode; + } goto out; } @@ -265,6 +269,8 @@ __lookup(struct radix_tree_root *root, v for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { index++; if (slot->slots[i]) { + if (indexes) + indexes[nr_found] = index - 1; results[nr_found++] = slot->slots[i]; if (nr_found == max_items) goto out; @@ -281,6 +287,7 @@ __lookup(struct radix_tree_root *root, v * @results: where the results of the lookup are placed * @first_index: start the lookup from this key * @max_items: place up to this many items at *results + * @indexes: (optional) array to store indexes of items. * * Performs an index-ascending scan of the tree for present items. Places * them at *@results and returns the number of items which were placed at @@ -290,7 +297,8 @@ __lookup(struct radix_tree_root *root, v */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items) + unsigned long first_index, unsigned int max_items, + unsigned long *indexes) { const unsigned long max_index = radix_tree_maxindex(root->height); unsigned long cur_index = first_index; @@ -303,7 +311,7 @@ radix_tree_gang_lookup(struct radix_tree if (cur_index > max_index) break; nr_found = __lookup(root, results + ret, cur_index, - max_items - ret, &next_index); + max_items - ret, indexes + ret, &next_index); ret += nr_found; if (next_index == 0) break; --- 2011-04-29.orig/xen/drivers/passthrough/io.c +++ 2011-04-29/xen/drivers/passthrough/io.c @@ -35,18 +35,28 @@ bool_t pt_irq_need_timer(uint32_t flags) return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE)); } +static int pt_irq_guest_eoi(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) +{ + if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT, + &pirq_dpci->flags) ) + { + pirq_dpci->masked = 0; + pirq_dpci->pending = 0; + pirq_guest_eoi(d, dpci_pirq(pirq_dpci)); + } + + return 0; +} + static void pt_irq_time_out(void *data) { - struct hvm_mirq_dpci_mapping *irq_map = data; - unsigned int guest_gsi, machine_gsi = 0; + struct hvm_pirq_dpci *irq_map = data; + unsigned int guest_gsi; struct hvm_irq_dpci *dpci = NULL; struct dev_intx_gsi_link *digl; struct hvm_girq_dpci_mapping *girq; uint32_t device, intx; - unsigned int nr_pirqs = irq_map->dom->nr_pirqs; - DECLARE_BITMAP(machine_gsi_map, nr_pirqs); - - bitmap_zero(machine_gsi_map, nr_pirqs); spin_lock(&irq_map->dom->event_lock); @@ -57,32 +67,18 @@ static void pt_irq_time_out(void *data) guest_gsi = digl->gsi; list_for_each_entry ( girq, &dpci->girq[guest_gsi], list ) { - machine_gsi = girq->machine_gsi; - set_bit(machine_gsi, machine_gsi_map); + struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi); + + pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH; } device = digl->device; intx = digl->intx; hvm_pci_intx_deassert(irq_map->dom, device, intx); } - for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs); - machine_gsi < nr_pirqs; - machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs, - machine_gsi + 1) ) - { - clear_bit(machine_gsi, dpci->dirq_mask); - dpci->mirq[machine_gsi].pending = 0; - } + pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL); spin_unlock(&irq_map->dom->event_lock); - - for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs); - machine_gsi < nr_pirqs; - machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs, - machine_gsi + 1) ) - { - pirq_guest_eoi(irq_map->dom, machine_gsi); - } } struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d) @@ -95,10 +91,6 @@ struct hvm_irq_dpci *domain_get_irq_dpci void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci) { - xfree(dpci->mirq); - xfree(dpci->dirq_mask); - xfree(dpci->mapping); - xfree(dpci->hvm_timer); xfree(dpci); } @@ -106,7 +98,9 @@ int pt_irq_create_bind_vtd( struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) { struct hvm_irq_dpci *hvm_irq_dpci = NULL; - uint32_t machine_gsi, guest_gsi; + struct hvm_pirq_dpci *pirq_dpci; + struct pirq *info; + uint32_t guest_gsi; uint32_t device, intx, link; struct dev_intx_gsi_link *digl; struct hvm_girq_dpci_mapping *girq; @@ -129,63 +123,45 @@ int pt_irq_create_bind_vtd( memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci)); tasklet_init(&hvm_irq_dpci->dirq_tasklet, hvm_dirq_assist, (unsigned long)d); - hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping, - d->nr_pirqs); - hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long, - BITS_TO_LONGS(d->nr_pirqs)); - hvm_irq_dpci->mapping = xmalloc_array(unsigned long, - BITS_TO_LONGS(d->nr_pirqs)); - hvm_irq_dpci->hvm_timer = xmalloc_array(struct timer, d->nr_pirqs); - if ( !hvm_irq_dpci->mirq || - !hvm_irq_dpci->dirq_mask || - !hvm_irq_dpci->mapping || - !hvm_irq_dpci->hvm_timer) - { - spin_unlock(&d->event_lock); - free_hvm_irq_dpci(hvm_irq_dpci); - return -ENOMEM; - } - memset(hvm_irq_dpci->mirq, 0, - d->nr_pirqs * sizeof(*hvm_irq_dpci->mirq)); - bitmap_zero(hvm_irq_dpci->dirq_mask, d->nr_pirqs); - bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs); - memset(hvm_irq_dpci->hvm_timer, 0, - d->nr_pirqs * sizeof(*hvm_irq_dpci->hvm_timer)); - for ( int i = 0; i < d->nr_pirqs; i++ ) { - INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list); - hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1; - } for ( int i = 0; i < NR_HVM_IRQS; i++ ) INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]); d->arch.hvm_domain.irq.dpci = hvm_irq_dpci; } + info = pirq_get_info(d, pirq); + if ( !info ) + { + spin_unlock(&d->event_lock); + return -ENOMEM; + } + pirq_dpci = pirq_dpci(info); + if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI ) { uint8_t dest, dest_mode; int dest_vcpu_id; - if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping)) + if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) { - hvm_irq_dpci->mirq[pirq].flags = HVM_IRQ_DPCI_MACH_MSI | - HVM_IRQ_DPCI_GUEST_MSI; - hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec; - hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags; + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI | + HVM_IRQ_DPCI_GUEST_MSI; + pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; + pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags; /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/ - rc = pirq_guest_bind(d->vcpu[0], pirq, 0); + rc = pirq_guest_bind(d->vcpu[0], pirq, info, 0); if ( rc == 0 && pt_irq_bind->u.msi.gtable ) { - rc = msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable); + rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable); if ( unlikely(rc) ) - pirq_guest_unbind(d, pirq); + pirq_guest_unbind(d, pirq, info); } if ( unlikely(rc) ) { - hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0; - hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0; - hvm_irq_dpci->mirq[pirq].flags = 0; - clear_bit(pirq, hvm_irq_dpci->mapping); + pirq_dpci->gmsi.gflags = 0; + pirq_dpci->gmsi.gvec = 0; + pirq_dpci->flags = 0; + pirq_cleanup_check(info, d, pirq); spin_unlock(&d->event_lock); return rc; } @@ -194,34 +170,33 @@ int pt_irq_create_bind_vtd( { uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI; - if ( (hvm_irq_dpci->mirq[pirq].flags & mask) != mask) + if ( (pirq_dpci->flags & mask) != mask) { spin_unlock(&d->event_lock); return -EBUSY; } /* if pirq is already mapped as vmsi, update the guest data/addr */ - if ( hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec || - hvm_irq_dpci->mirq[pirq].gmsi.gflags != pt_irq_bind->u.msi.gflags) { + if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec || + pirq_dpci->gmsi.gflags != pt_irq_bind->u.msi.gflags) { /* Directly clear pending EOIs before enabling new MSI info. */ - pirq_guest_eoi(d, pirq); + pirq_guest_eoi(d, info); - hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec; - hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags; + pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; + pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags; } } /* Caculate dest_vcpu_id for MSI-type pirq migration */ - dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK; - dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK); + dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK; + dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK); dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); - hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id; + pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; spin_unlock(&d->event_lock); if ( dest_vcpu_id >= 0 ) hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]); } else { - machine_gsi = pt_irq_bind->machine_irq; device = pt_irq_bind->u.pci.device; intx = pt_irq_bind->u.pci.intx; guest_gsi = hvm_pci_intx_gsi(device, intx); @@ -247,50 +222,51 @@ int pt_irq_create_bind_vtd( digl->intx = intx; digl->gsi = guest_gsi; digl->link = link; - list_add_tail(&digl->list, - &hvm_irq_dpci->mirq[machine_gsi].digl_list); + list_add_tail(&digl->list, &pirq_dpci->digl_list); girq->device = device; girq->intx = intx; - girq->machine_gsi = machine_gsi; + girq->machine_gsi = pirq; list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]); /* Bind the same mirq once in the same domain */ - if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping)) + if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) { unsigned int share; - hvm_irq_dpci->mirq[machine_gsi].dom = d; + pirq_dpci->dom = d; if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE ) { - hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_MSI | - HVM_IRQ_DPCI_GUEST_PCI | - HVM_IRQ_DPCI_TRANSLATE; + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | + HVM_IRQ_DPCI_MACH_MSI | + HVM_IRQ_DPCI_GUEST_PCI | + HVM_IRQ_DPCI_TRANSLATE; share = 0; } else /* PT_IRQ_TYPE_PCI */ { - hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_PCI | - HVM_IRQ_DPCI_GUEST_PCI; + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | + HVM_IRQ_DPCI_MACH_PCI | + HVM_IRQ_DPCI_GUEST_PCI; share = BIND_PIRQ__WILL_SHARE; } /* Init timer before binding */ - if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) - init_timer(&hvm_irq_dpci->hvm_timer[machine_gsi], - pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0); + if ( pt_irq_need_timer(pirq_dpci->flags) ) + init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0); /* Deal with gsi for legacy devices */ - rc = pirq_guest_bind(d->vcpu[0], machine_gsi, share); + rc = pirq_guest_bind(d->vcpu[0], pirq, info, share); if ( unlikely(rc) ) { - if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) - kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]); - hvm_irq_dpci->mirq[machine_gsi].dom = NULL; - clear_bit(machine_gsi, hvm_irq_dpci->mapping); + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + pirq_dpci->dom = NULL; list_del(&girq->list); xfree(girq); list_del(&digl->list); hvm_irq_dpci->link_cnt[link]--; + pirq_dpci->flags = 0; + pirq_cleanup_check(info, d, pirq); spin_unlock(&d->event_lock); xfree(digl); return rc; @@ -302,7 +278,7 @@ int pt_irq_create_bind_vtd( if ( iommu_verbose ) dprintk(VTDPREFIX, "d%d: bind: m_gsi=%u g_gsi=%u device=%u intx=%u\n", - d->domain_id, machine_gsi, guest_gsi, device, intx); + d->domain_id, pirq, guest_gsi, device, intx); } return 0; } @@ -311,11 +287,12 @@ int pt_irq_destroy_bind_vtd( struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) { struct hvm_irq_dpci *hvm_irq_dpci = NULL; + struct hvm_pirq_dpci *pirq_dpci; uint32_t machine_gsi, guest_gsi; uint32_t device, intx, link; - struct list_head *digl_list, *tmp; - struct dev_intx_gsi_link *digl; + struct dev_intx_gsi_link *digl, *tmp; struct hvm_girq_dpci_mapping *girq; + struct pirq *pirq; machine_gsi = pt_irq_bind->machine_irq; device = pt_irq_bind->u.pci.device; @@ -350,14 +327,14 @@ int pt_irq_destroy_bind_vtd( } } + pirq = pirq_info(d, machine_gsi); + pirq_dpci = pirq_dpci(pirq); + /* clear the mirq info */ - if ( test_bit(machine_gsi, hvm_irq_dpci->mapping)) + if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) { - list_for_each_safe ( digl_list, tmp, - &hvm_irq_dpci->mirq[machine_gsi].digl_list ) + list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) { - digl = list_entry(digl_list, - struct dev_intx_gsi_link, list); if ( digl->device == device && digl->intx == intx && digl->link == link && @@ -368,15 +345,15 @@ int pt_irq_destroy_bind_vtd( } } - if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) + if ( list_empty(&pirq_dpci->digl_list) ) { - pirq_guest_unbind(d, machine_gsi); - msixtbl_pt_unregister(d, machine_gsi); - if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) - kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]); - hvm_irq_dpci->mirq[machine_gsi].dom = NULL; - hvm_irq_dpci->mirq[machine_gsi].flags = 0; - clear_bit(machine_gsi, hvm_irq_dpci->mapping); + pirq_guest_unbind(d, machine_gsi, pirq); + msixtbl_pt_unregister(d, pirq); + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + pirq_dpci->dom = NULL; + pirq_dpci->flags = 0; + pirq_cleanup_check(pirq, d, machine_gsi); } } spin_unlock(&d->event_lock); @@ -389,120 +366,156 @@ int pt_irq_destroy_bind_vtd( return 0; } -int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) +void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci) +{ + INIT_LIST_HEAD(&dpci->digl_list); + dpci->gmsi.dest_vcpu_id = -1; +} + +bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci) +{ + return !dpci->flags; +} + +int pt_pirq_iterate(struct domain *d, + int (*cb)(struct domain *, unsigned int, + struct hvm_pirq_dpci *, void *), + void *arg) +{ + int rc = 0; + unsigned int pirq = 0, n, i; + unsigned long indexes[8]; + struct pirq *pirqs[ARRAY_SIZE(indexes)]; + + ASSERT(spin_is_locked(&d->event_lock)); + + do { + n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq, + ARRAY_SIZE(pirqs), indexes); + for ( i = 0; i < n; ++i ) + { + struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]); + + pirq = indexes[i]; + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + rc = cb(d, pirq, pirq_dpci, arg); + } + } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); + + return rc; +} + +int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq) { struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d); + struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq); - ASSERT(spin_is_locked(&irq_desc[domain_pirq_to_irq(d, mirq)].lock)); - if ( !iommu_enabled || !dpci || !test_bit(mirq, dpci->mapping)) + if ( !iommu_enabled || !dpci || !pirq_dpci || + !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) return 0; - set_bit(mirq, dpci->dirq_mask); + pirq_dpci->masked = 1; tasklet_schedule(&dpci->dirq_tasklet); return 1; } #ifdef SUPPORT_MSI_REMAPPING /* called with d->event_lock held */ -static void __msi_pirq_eoi(struct domain *d, int pirq) +static void __msi_pirq_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci) { - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; irq_desc_t *desc; - if ( ( pirq >= 0 ) && ( pirq < d->nr_pirqs ) && - test_bit(pirq, hvm_irq_dpci->mapping) && - ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) ) + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && + (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) ) { + struct pirq *pirq = dpci_pirq(pirq_dpci); + BUG_ON(!local_irq_is_enabled()); - desc = domain_spin_lock_irq_desc(d, pirq, NULL); + desc = pirq_spin_lock_irq_desc(d, pirq, NULL); if ( !desc ) return; desc->status &= ~IRQ_INPROGRESS; - spin_unlock_irq(&desc->lock); + desc_guest_eoi(d, desc, pirq); + } +} - pirq_guest_eoi(d, pirq); +static int _hvm_dpci_msi_eoi(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) +{ + int vector = (long)arg; + + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) && + (pirq_dpci->gmsi.gvec == vector) ) + { + int dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK; + int dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK); + + if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, + dest_mode) ) + { + __msi_pirq_eoi(d, pirq_dpci); + return 1; + } } + + return 0; } void hvm_dpci_msi_eoi(struct domain *d, int vector) { - int pirq, dest, dest_mode; - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - - if ( !iommu_enabled || (hvm_irq_dpci == NULL) ) + if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci ) return; spin_lock(&d->event_lock); - for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs); - pirq < d->nr_pirqs; - pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) ) - { - if ( (!(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI)) || - (hvm_irq_dpci->mirq[pirq].gmsi.gvec != vector) ) - continue; - - dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK; - dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK); - if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, dest_mode) ) - break; - } - if ( pirq < d->nr_pirqs ) - __msi_pirq_eoi(d, pirq); + pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector); spin_unlock(&d->event_lock); } -extern int vmsi_deliver(struct domain *d, int pirq); -static int hvm_pci_msi_assert(struct domain *d, int pirq) +static int hvm_pci_msi_assert(struct domain *d, + struct hvm_pirq_dpci *pirq_dpci) { + struct pirq *pirq = dpci_pirq(pirq_dpci); + if ( hvm_domain_use_pirq(d, pirq) ) return send_guest_pirq(d, pirq); else - return vmsi_deliver(d, pirq); + return vmsi_deliver(d, pirq_dpci); } #endif -static void hvm_dirq_assist(unsigned long _d) +static int _hvm_dirq_assist(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) { - unsigned int pirq; uint32_t device, intx; - struct domain *d = (struct domain *)_d; - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; struct dev_intx_gsi_link *digl; - ASSERT(hvm_irq_dpci); - - for ( pirq = find_first_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs); - pirq < d->nr_pirqs; - pirq = find_next_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs, pirq + 1) ) + if ( test_and_clear_bool(pirq_dpci->masked) ) { - if ( !test_and_clear_bit(pirq, hvm_irq_dpci->dirq_mask) ) - continue; - - spin_lock(&d->event_lock); #ifdef SUPPORT_MSI_REMAPPING - if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) + if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) { - hvm_pci_msi_assert(d, pirq); - spin_unlock(&d->event_lock); - continue; + hvm_pci_msi_assert(d, pirq_dpci); + return 0; } #endif - list_for_each_entry ( digl, &hvm_irq_dpci->mirq[pirq].digl_list, list ) + list_for_each_entry ( digl, &pirq_dpci->digl_list, list ) { + struct pirq *info = dpci_pirq(pirq_dpci); + device = digl->device; intx = digl->intx; - if ( hvm_domain_use_pirq(d, pirq) ) - send_guest_pirq(d, pirq); + if ( hvm_domain_use_pirq(d, info) ) + send_guest_pirq(d, info); else hvm_pci_intx_assert(d, device, intx); - hvm_irq_dpci->mirq[pirq].pending++; + pirq_dpci->pending++; #ifdef SUPPORT_MSI_REMAPPING - if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_TRANSLATE ) + if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE ) { /* for translated MSI to INTx interrupt, eoi as early as possible */ - __msi_pirq_eoi(d, pirq); + __msi_pirq_eoi(d, pirq_dpci); } #endif } @@ -514,37 +527,50 @@ static void hvm_dirq_assist(unsigned lon * guest will never deal with the irq, then the physical interrupt line * will never be deasserted. */ - if ( pt_irq_need_timer(hvm_irq_dpci->mirq[pirq].flags) ) - set_timer(&hvm_irq_dpci->hvm_timer[pirq], - NOW() + PT_IRQ_TIME_OUT); - spin_unlock(&d->event_lock); + if ( pt_irq_need_timer(pirq_dpci->flags) ) + set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT); } + + return 0; +} + +static void hvm_dirq_assist(unsigned long _d) +{ + struct domain *d = (struct domain *)_d; + + ASSERT(d->arch.hvm_domain.irq.dpci); + + spin_lock(&d->event_lock); + pt_pirq_iterate(d, _hvm_dirq_assist, NULL); + spin_unlock(&d->event_lock); } static void __hvm_dpci_eoi(struct domain *d, - struct hvm_irq_dpci *hvm_irq_dpci, struct hvm_girq_dpci_mapping *girq, union vioapic_redir_entry *ent) { - uint32_t device, intx, machine_gsi; + uint32_t device, intx; + struct pirq *pirq; + struct hvm_pirq_dpci *pirq_dpci; device = girq->device; intx = girq->intx; hvm_pci_intx_deassert(d, device, intx); - machine_gsi = girq->machine_gsi; + pirq = pirq_info(d, girq->machine_gsi); + pirq_dpci = pirq_dpci(pirq); /* * No need to get vector lock for timer * since interrupt is still not EOIed */ - if ( --hvm_irq_dpci->mirq[machine_gsi].pending || + if ( --pirq_dpci->pending || ( ent && ent->fields.mask ) || - ! pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) + ! pt_irq_need_timer(pirq_dpci->flags) ) return; - stop_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]); - pirq_guest_eoi(d, machine_gsi); + stop_timer(&pirq_dpci->timer); + pirq_guest_eoi(d, pirq); } void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, @@ -569,7 +595,7 @@ void hvm_dpci_eoi(struct domain *d, unsi goto unlock; list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list ) - __hvm_dpci_eoi(d, hvm_irq_dpci, girq, ent); + __hvm_dpci_eoi(d, girq, ent); unlock: spin_unlock(&d->event_lock); --- 2011-04-29.orig/xen/drivers/passthrough/pci.c +++ 2011-04-29/xen/drivers/passthrough/pci.c @@ -236,12 +236,28 @@ out: return ret; } +static int pci_clean_dpci_irq(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) +{ + struct dev_intx_gsi_link *digl, *tmp; + + pirq_guest_unbind(d, pirq, dpci_pirq(pirq_dpci)); + + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + + list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) + { + list_del(&digl->list); + xfree(digl); + } + + return 0; +} + static void pci_clean_dpci_irqs(struct domain *d) { struct hvm_irq_dpci *hvm_irq_dpci = NULL; - uint32_t i; - struct list_head *digl_list, *tmp; - struct dev_intx_gsi_link *digl; if ( !iommu_enabled ) return; @@ -255,24 +271,7 @@ static void pci_clean_dpci_irqs(struct d { tasklet_kill(&hvm_irq_dpci->dirq_tasklet); - for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs); - i < d->nr_pirqs; - i = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, i + 1) ) - { - pirq_guest_unbind(d, i); - - if ( pt_irq_need_timer(hvm_irq_dpci->mirq[i].flags) ) - kill_timer(&hvm_irq_dpci->hvm_timer[i]); - - list_for_each_safe ( digl_list, tmp, - &hvm_irq_dpci->mirq[i].digl_list ) - { - digl = list_entry(digl_list, - struct dev_intx_gsi_link, list); - list_del(&digl->list); - xfree(digl); - } - } + pt_pirq_iterate(d, pci_clean_dpci_irq, NULL); d->arch.hvm_domain.irq.dpci = NULL; free_hvm_irq_dpci(hvm_irq_dpci); --- 2011-04-29.orig/xen/drivers/passthrough/vtd/x86/vtd.c +++ 2011-04-29/xen/drivers/passthrough/vtd/x86/vtd.c @@ -68,12 +68,32 @@ void *__init map_to_nocache_virt(int nr_ return (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); } -void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) +static int _hvm_dpci_isairq_eoi(struct domain *d, unsigned int pirq, + struct hvm_pirq_dpci *pirq_dpci, void *arg) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - struct hvm_irq_dpci *dpci = NULL; + unsigned int isairq = (long)arg; struct dev_intx_gsi_link *digl, *tmp; - int i; + + list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) + { + if ( hvm_irq->pci_link.route[digl->link] == isairq ) + { + hvm_pci_intx_deassert(d, digl->device, digl->intx); + if ( --pirq_dpci->pending == 0 ) + { + stop_timer(&pirq_dpci->timer); + pirq_guest_eoi(d, dpci_pirq(pirq_dpci)); + } + } + } + + return 0; +} + +void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) +{ + struct hvm_irq_dpci *dpci = NULL; ASSERT(isairq < NR_ISAIRQS); if ( !iommu_enabled) @@ -83,29 +103,10 @@ void hvm_dpci_isairq_eoi(struct domain * dpci = domain_get_irq_dpci(d); - if ( !dpci || !test_bit(isairq, dpci->isairq_map) ) + if ( dpci && test_bit(isairq, dpci->isairq_map) ) { - spin_unlock(&d->event_lock); - return; - } - /* Multiple mirq may be mapped to one isa irq */ - for ( i = find_first_bit(dpci->mapping, d->nr_pirqs); - i < d->nr_pirqs; - i = find_next_bit(dpci->mapping, d->nr_pirqs, i + 1) ) - { - list_for_each_entry_safe ( digl, tmp, - &dpci->mirq[i].digl_list, list ) - { - if ( hvm_irq->pci_link.route[digl->link] == isairq ) - { - hvm_pci_intx_deassert(d, digl->device, digl->intx); - if ( --dpci->mirq[i].pending == 0 ) - { - stop_timer(&dpci->hvm_timer[i]); - pirq_guest_eoi(d, i); - } - } - } + /* Multiple mirq may be mapped to one isa irq */ + pt_pirq_iterate(d, _hvm_dpci_isairq_eoi, (void *)(long)isairq); } spin_unlock(&d->event_lock); } --- 2011-04-29.orig/xen/include/asm-ia64/domain.h +++ 2011-04-29/xen/include/asm-ia64/domain.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -316,6 +317,23 @@ struct arch_vcpu { cpumask_t cache_coherent_map; }; +struct arch_pirq { + struct hvm_pirq_dpci dpci; +}; + +#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.dpci : NULL) +#define dpci_pirq(dpci) container_of(dpci, struct pirq, arch.dpci) + +#define alloc_pirq_struct(d) ({ \ + struct pirq *pirq = xmalloc(struct pirq); \ + if ( pirq ) \ + { \ + memset(pirq, 0, sizeof(*pirq)); \ + pt_pirq_init(d, &pirq->arch.dpci); \ + } \ + pirq; \ +}) + #include /* for KERNEL_DS */ #include --- 2011-04-29.orig/xen/include/asm-x86/domain.h +++ 2011-04-29/xen/include/asm-x86/domain.h @@ -286,9 +286,6 @@ struct arch_domain /* NB. protected by d->event_lock and by irq_desc[irq].lock */ struct radix_tree_root irq_pirq; - int *pirq_irq; - /* pirq to emulated irq */ - int *pirq_emuirq; /* Maximum physical-address bitwidth supported by this guest. */ unsigned int physaddr_bitsize; --- 2011-04-29.orig/xen/include/asm-x86/hvm/irq.h +++ 2011-04-29/xen/include/asm-x86/hvm/irq.h @@ -111,4 +111,6 @@ struct hvm_intack hvm_vcpu_ack_pending_i */ #define SUPPORT_MSI_REMAPPING 1 +void msixtbl_pt_cleanup(struct domain *d); + #endif /* __ASM_X86_HVM_IRQ_H__ */ --- 2011-04-29.orig/xen/include/asm-x86/irq.h +++ 2011-04-29/xen/include/asm-x86/irq.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,20 @@ extern unsigned int io_apic_irqs; DECLARE_PER_CPU(unsigned int, irq_count); +struct pirq; +struct arch_pirq { + int irq; + union { + struct hvm_pirq { + int emuirq; + struct hvm_pirq_dpci dpci; + } hvm; + }; +}; + +#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.hvm.dpci : NULL) +#define dpci_pirq(pd) container_of(pd, struct pirq, arch.hvm.dpci) + int pirq_shared(struct domain *d , int irq); int map_domain_pirq(struct domain *d, int pirq, int irq, int type, @@ -114,7 +129,7 @@ int get_free_pirq(struct domain *d, int void free_domain_pirqs(struct domain *d); int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); int unmap_domain_pirq_emuirq(struct domain *d, int pirq); -int hvm_domain_use_pirq(struct domain *d, int irq); +bool_t hvm_domain_use_pirq(const struct domain *, const struct pirq *); int init_irq_data(void); @@ -146,11 +161,11 @@ void irq_set_affinity(struct irq_desc *, int init_domain_irq_mapping(struct domain *); void cleanup_domain_irq_mapping(struct domain *); -#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) +#define domain_pirq_to_irq(d, pirq) pirq_field(d, pirq, arch.irq) #define domain_irq_to_pirq(d, irq) \ ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq)) #define PIRQ_ALLOCATED -1 -#define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq]) +#define domain_pirq_to_emuirq(d, pirq) pirq_field(d, pirq, arch.hvm.emuirq) #define domain_emuirq_to_pirq(d, emuirq) \ (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \ IRQ_UNBOUND + 1) - 1) --- 2011-04-29.orig/xen/include/xen/domain.h +++ 2011-04-29/xen/include/xen/domain.h @@ -38,6 +38,12 @@ struct vcpu_guest_context *alloc_vcpu_gu void free_vcpu_guest_context(struct vcpu_guest_context *); #endif +/* Allocate/free a PIRQ structure. */ +#ifndef alloc_pirq_struct +struct pirq *alloc_pirq_struct(struct domain *); +#endif +#define free_pirq_struct xfree + /* * Initialise/destroy arch-specific details of a VCPU. * - vcpu_initialise() is called after the basic generic fields of the --- 2011-04-29.orig/xen/include/xen/event.h +++ 2011-04-29/xen/include/xen/event.h @@ -36,7 +36,7 @@ void send_guest_global_virq(struct domai * @pirq: Physical IRQ number * Returns TRUE if the delivery port was already pending. */ -int send_guest_pirq(struct domain *d, int pirq); +int send_guest_pirq(struct domain *, const struct pirq *); /* Send a notification from a given domain's event-channel port. */ int evtchn_send(struct domain *d, unsigned int lport); --- 2011-04-29.orig/xen/include/xen/hvm/irq.h +++ 2011-04-29/xen/include/xen/hvm/irq.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include struct dev_intx_gsi_link { @@ -38,11 +38,15 @@ struct dev_intx_gsi_link { #define _HVM_IRQ_DPCI_MACH_PCI_SHIFT 0 #define _HVM_IRQ_DPCI_MACH_MSI_SHIFT 1 +#define _HVM_IRQ_DPCI_MAPPED_SHIFT 2 +#define _HVM_IRQ_DPCI_EOI_LATCH_SHIFT 3 #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT 4 #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT 5 #define _HVM_IRQ_DPCI_TRANSLATE_SHIFT 15 #define HVM_IRQ_DPCI_MACH_PCI (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT) #define HVM_IRQ_DPCI_MACH_MSI (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT) +#define HVM_IRQ_DPCI_MAPPED (1 << _HVM_IRQ_DPCI_MAPPED_SHIFT) +#define HVM_IRQ_DPCI_EOI_LATCH (1 << _HVM_IRQ_DPCI_EOI_LATCH_SHIFT) #define HVM_IRQ_DPCI_GUEST_PCI (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT) #define HVM_IRQ_DPCI_GUEST_MSI (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT) #define HVM_IRQ_DPCI_TRANSLATE (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT) @@ -63,14 +67,6 @@ struct hvm_gmsi_info { int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */ }; -struct hvm_mirq_dpci_mapping { - uint32_t flags; - int pending; - struct list_head digl_list; - struct domain *dom; - struct hvm_gmsi_info gmsi; -}; - struct hvm_girq_dpci_mapping { struct list_head list; uint8_t device; @@ -88,20 +84,33 @@ struct hvm_girq_dpci_mapping { /* Protected by domain's event_lock */ struct hvm_irq_dpci { - /* Machine IRQ to guest device/intx mapping. */ - unsigned long *mapping; - struct hvm_mirq_dpci_mapping *mirq; - unsigned long *dirq_mask; /* Guest IRQ to guest device/intx mapping. */ struct list_head girq[NR_HVM_IRQS]; /* Record of mapped ISA IRQs */ DECLARE_BITMAP(isairq_map, NR_ISAIRQS); /* Record of mapped Links */ uint8_t link_cnt[NR_LINK]; - struct timer *hvm_timer; struct tasklet dirq_tasklet; }; +/* Machine IRQ to guest device/intx mapping. */ +struct hvm_pirq_dpci { + uint32_t flags; + bool_t masked; + uint16_t pending; + struct list_head digl_list; + struct domain *dom; + struct hvm_gmsi_info gmsi; + struct timer timer; +}; + +void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *); +bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *); +int pt_pirq_iterate(struct domain *d, + int (*cb)(struct domain *, unsigned int pirq, + struct hvm_pirq_dpci *, void *arg), + void *arg); + /* Modify state of a PCI INTx wire. */ void hvm_pci_intx_assert( struct domain *d, unsigned int device, unsigned int intx); @@ -120,4 +129,6 @@ void hvm_maybe_deassert_evtchn_irq(void) void hvm_assert_evtchn_irq(struct vcpu *v); void hvm_set_callback_via(struct domain *d, uint64_t via); +int vmsi_deliver(struct domain *, const struct hvm_pirq_dpci *); + #endif /* __XEN_HVM_IRQ_H__ */ --- 2011-04-29.orig/xen/include/xen/iommu.h +++ 2011-04-29/xen/include/xen/iommu.h @@ -88,7 +88,9 @@ int iommu_unmap_page(struct domain *d, u void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); void iommu_set_pgd(struct domain *d); void iommu_domain_teardown(struct domain *d); -int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq); + +struct pirq; +int hvm_do_IRQ_dpci(struct domain *, struct pirq *); int dpci_ioport_intercept(ioreq_t *p); int pt_irq_create_bind_vtd(struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind); --- 2011-04-29.orig/xen/include/xen/irq.h +++ 2011-04-29/xen/include/xen/irq.h @@ -135,13 +135,41 @@ extern void no_action(int cpl, void *dev struct domain; struct vcpu; -extern int pirq_guest_eoi(struct domain *d, int irq); + +struct pirq { + u16 evtchn; + bool_t masked; + struct arch_pirq arch; +}; + +#define pirq_info(d, p) ((struct pirq *)radix_tree_lookup(&(d)->pirq_tree, p)) + +/* Use this instead of pirq_info() if the structure may need allocating. */ +extern struct pirq *pirq_get_info(struct domain *, int pirq); + +#define pirq_field(d, p, f) ({ \ + const struct pirq *__pi = pirq_info(d, p); \ + __pi ? __pi->f : 0; \ +}) +#define pirq_to_evtchn(d, pirq) pirq_field(d, pirq, evtchn) +#define pirq_masked(d, pirq) pirq_field(d, pirq, masked) + +void pirq_cleanup_check(struct pirq *, struct domain *, int); + +#define pirq_cleanup_check(info, d, pirq) \ + ((info)->evtchn ? pirq_cleanup_check(info, d, pirq) : (void)0) + +extern void pirq_guest_eoi(struct domain *, struct pirq *); +extern void desc_guest_eoi(struct domain *, struct irq_desc *, struct pirq *); extern int pirq_guest_unmask(struct domain *d); -extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share); -extern void pirq_guest_unbind(struct domain *d, int irq); +extern int pirq_guest_bind(struct vcpu *, int pirq, struct pirq *, + int will_share); +extern void pirq_guest_unbind(struct domain *d, int pirq, struct pirq *); extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *); extern irq_desc_t *domain_spin_lock_irq_desc( struct domain *d, int irq, unsigned long *pflags); +extern irq_desc_t *pirq_spin_lock_irq_desc( + struct domain *, const struct pirq *, unsigned long *pflags); static inline void set_native_irq_info(unsigned int irq, const cpumask_t *mask) { --- 2011-04-29.orig/xen/include/xen/pci.h +++ 2011-04-29/xen/include/xen/pci.h @@ -117,8 +117,9 @@ int pci_find_cap_offset(u8 bus, u8 dev, int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap); int pci_find_ext_capability(int seg, int bus, int devfn, int cap); -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable); -void msixtbl_pt_unregister(struct domain *d, int pirq); +struct pirq; +int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable); +void msixtbl_pt_unregister(struct domain *, struct pirq *); void pci_enable_acs(struct pci_dev *pdev); #endif /* __XEN_PCI_H__ */ --- 2011-04-29.orig/xen/include/xen/radix-tree.h +++ 2011-04-29/xen/include/xen/radix-tree.h @@ -72,6 +72,7 @@ void *radix_tree_delete(struct radix_tre void(*node_free)(struct radix_tree_node *)); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); + unsigned long first_index, unsigned int max_items, + unsigned long *indexes); #endif /* _XEN_RADIX_TREE_H */ --- 2011-04-29.orig/xen/include/xen/sched.h +++ 2011-04-29/xen/include/xen/sched.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -234,13 +235,11 @@ struct domain struct grant_table *grant_table; /* - * Interrupt to event-channel mappings. Updates should be protected by the - * domain's event-channel spinlock. Read accesses can also synchronise on - * the lock, but races don't usually matter. + * Interrupt to event-channel mappings and other per-guest-pirq data. + * Protected by the domain's event-channel spinlock. */ unsigned int nr_pirqs; - u16 *pirq_to_evtchn; - unsigned long *pirq_mask; + struct radix_tree_root pirq_tree; /* I/O capabilities (access to IRQs and memory-mapped I/O). */ struct rangeset *iomem_caps;