[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2, v2] replace d->nr_pirqs sized arrays with radix tree


  • To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Mon, 09 May 2011 09:43:43 +0100
  • Cc: Allen M Kay <allen.m.kay@xxxxxxxxx>
  • Delivery-date: Mon, 09 May 2011 01:44:45 -0700
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:user-agent:date:subject:from:to:cc:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; b=wCaSQnMcclu3PATc7T+KpNQ3Oyw98unW88k/ln63WxnPVq+6y5qt1fqdIWmWvv3pnQ UV0p+yPmVVW9wE3tesdhEsizpYOyrkIamTFFYRhH7vqvidNnw/LOsm/lz7/MOuE6GKF6 oXrjMHgEtRh0R9Py44aOvJ2zzYLzr7Mi88hn0=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcwOJTOmERd22hx9xki7D2Zv5FQffQ==
  • Thread-topic: [Xen-devel] [PATCH 2/2, v2] replace d->nr_pirqs sized arrays with radix tree

On 03/05/2011 15:09, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> With this it is questionable whether retaining struct domain's nr_pirqs
> is actually necessary - the value now only serves for bounds checking,
> and this boundary could easily be nr_irqs.
> 
> Another thing to consider is whether it's woth storing the pirq number
> in struct pirq, to avoid passing the number and a pointer to quite a
> number of functions.
> 
> Note that ia64, the build of which is broken currently anyway, is only
> partially fixed up.

Jan, I just applied a fixed-up version of your patch 1/2. I'm leaving this
one to you however as it is huge ;-), and you can work out what to do based
on how I changed your first patch before I aplied it.

Just a couple of extra thoughts on this one:
 * You will need free_pirq_struct() to do an RCU-safe free, if pirq_info()
is ever called without the pirq_tree's appropriate writer lock. I didn't
investigate deeply enough to discover whether this is the case or not. If
you do need an RCU-safe free, then of course 'struct pirq' will need to
contain an rcu_head -- not a problem imo.
 * Instead of changing radix_tree_gang_lookup(), please just add a pirq
field to 'struct pirq'. As with the rcu_head above, there's no massive need
to save bytes for this structure; furthermore I'd rather have the indexes in
the structs in the heap, rather than inflating the caller's stack.

 -- Keir

> v2: adjustments for split setup/teardown of translation data
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- 2011-04-29.orig/xen/arch/ia64/vmx/vmx_interrupt.c
> +++ 2011-04-29/xen/arch/ia64/vmx/vmx_interrupt.c
> @@ -155,13 +155,13 @@ void hvm_isa_irq_deassert(struct domain
>      /* dummy */
>  }
>  
> -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
> +int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
>  {
>      /* dummy */
>      return -ENOSYS;
>  }
>  
> -void msixtbl_pt_unregister(struct domain *d, int pirq)
> +void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
>  {
>      * dummy */
>  }
> --- 2011-04-29.orig/xen/arch/ia64/xen/hypercall.c
> +++ 2011-04-29/xen/arch/ia64/xen/hypercall.c
> @@ -65,8 +65,11 @@ static long __do_pirq_guest_eoi(struct d
>  {
> if ( pirq < 0 || pirq >= NR_IRQS )
> return -EINVAL;
> - if ( d->arch.pirq_eoi_map )
> -  evtchn_unmask(d->pirq_to_evtchn[pirq]);
> + if ( d->arch.pirq_eoi_map ) {
> +  spin_lock(&d->event_lock);
> +  evtchn_unmask(pirq_to_evtchn(d, pirq));
> +  spin_unlock(&d->event_lock);
> + }
> return pirq_guest_eoi(d, pirq);
>  }
>  
> --- 2011-04-29.orig/xen/arch/ia64/xen/irq.c
> +++ 2011-04-29/xen/arch/ia64/xen/irq.c
> @@ -363,15 +363,17 @@ void __do_IRQ_guest(int irq)
>      irq_desc_t         *esc = &irq_desc[irq];
>      irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
>      struct domain      *d;
> +    struct pirq        *pirq;
>      int                 i, already_pending = 0;
>  
>      for ( i = 0; i < action->nr_guests; i++ )
>      {
>          d = action->guest[i];
> +        pirq = pirq_info(d, irq);
>          if ( (action->ack_type != ACKTYPE_NONE) &&
> -             !test_and_set_bit(irq, &d->pirq_mask) )
> +             !test_and_set_bool(pirq->masked) )
>              action->in_flight++;
> -  if ( hvm_do_IRQ_dpci(d, irq) )
> +  if ( hvm_do_IRQ_dpci(d, pirq) )
> {
> if ( action->ack_type == ACKTYPE_NONE )
> {
> @@ -379,7 +381,7 @@ void __do_IRQ_guest(int irq)
> desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */
> }
> }
> -  else if ( send_guest_pirq(d, irq) &&
> +  else if ( send_guet_pirq(d, pirq) &&
> (action->ack_type == ACKTYPE_NONE) )
> {
> already_pending++;
> @@ -423,26 +425,23 @@ static int pirq_acktype(int irq)
>      return ACKTYPE_NONE;
>  }
>  
> -int pirq_guest_eoi(struct domain *d, int irq)
> +int pirq_guest_eoi(struct domain *d, struct pirq *pirq)
>  {
      irq_desc_t *desc;
>      irq_guest_action_t *action;
>  
> -    if ( (irq < 0) || (irq >= NR_IRQS) )
> -        return -EINVAL;
> -
>      desc = &irq_desc[irq];
>      spin_lock_irq(&desc->lock);
>      action = (irq_guest_action_t *)desc->action;
>  
>      if ( action->ack_type == ACKTYPE_NONE )
>      {
> -        ASSERT(!test_bit(irq, d->pirq_mask));
> +        ASSERT(!pirq->masked);
>          stop_timer(&irq_guest_eoi_timer[irq]);
>          _irq_guest_eoi(desc);
>      }
>  
> -    if ( test_and_clear_bit(irq, &d->pirq_mask) && (--action->in_flight == 0)
> )
> +    if ( test_and_clear_bool(pirq->masked) && (--action->in_flight == 0) )
>      {
>          ASSERT(action->ack_type == ACKTYPE_UNMASK);
>          desc->handler->end(irq);
> @@ -455,22 +454,27 @@ int pirq_guest_eoi(struct domain *d, int
>  
>  int pirq_guest_unmask(struct domain *d)
>  {
> -    int            irq;
> +    unsigned int pirq = 0, n, i;
> +    unsigned long indexes[16];
> +    struct pirq *pirqs[ARRAY_SIZE(indexes)];
>      shared_info_t *s = d->shared_info;
>  
> -    for ( irq = find_first_bit(d->pirq_mask, NR_IRQS);
> -          irq < NR_IRQS;
> -          irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) )
> -    {
> -        if ( !test_bit(d->pirq_to_evtchn[irq], &s->evtchn_mask[0]) )
> -            pirq_guest_eoi(d, irq);
> -
> -    }
> +    do {
> +        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
> +                                   ARRAY_SIZE(pirqs), indexes);
> +        for ( i = 0; i < n; ++i )
> +        {
> +            pirq = indexes[i];
> +            if ( pirqs[i]->masked &&
> +                 !test_bit(pirqs[i]->evtchn, &s->evtchn_mask[0]) )
> +            pirq_guest_eoi(d, pirqs[i]);
> +        }
> +    } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
>  
>      return 0;
>  }
>  
> -int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
> +int pirq_guest_bind(struct vcpu *v, int irq, struct pirq *pirq, int
> will_share)
>  {
>      irq_desc_t         *desc = &irq_desc[irq];
>      irq_guest_action_t *action;
> @@ -554,7 +558,7 @@ int pirq_guest_bind(struct vcpu *v, int
>      return rc;
>  }
>  
> -void pirq_guest_unbind(struct domain *d, int irq)
> +void pirq_guest_unbind(struct domain *d, int irq, struct pirq *pirq)
>  {
>      irq_desc_t         *desc = &irq_desc[irq];
>      irq_guest_action_t *action;
> @@ -572,7 +576,7 @@ void pirq_guest_unbind(struct domain *d,
>      action->nr_guests--;
>  
>      if ( action->ack_type == ACKTYPE_UNMASK )
> -        if ( test_and_clear_bit(irq, &d->pirq_mask) &&
> +        if ( test_and_clear_bool(pirq->masked) &&
>               (--action->in_flight == 0) )
>              desc->handler->end(irq);
>  
> --- 2011-04-29.orig/xen/arch/x86/domain.c
> +++ 2011-04-29/xen/arch/x86/domain.c
> @@ -608,25 +608,9 @@ int arch_domain_create(struct domain *d,
>          share_xen_page_with_guest(
>              virt_to_page(d->shared_info), d, XENSHARE_writable);
>  
> -        d->arch.pirq_irq = xmalloc_array(int, d->nr_pirqs);
> -        if ( !d->arch.pirq_irq )
> -            goto fail;
> -        memset(d->arch.pirq_irq, 0,
> -               d->nr_pirqs * sizeof(*d->arch.pirq_irq));
> -
>          if ( (rc = init_domain_irq_mapping(d)) != 0 )
>              goto fail;
>  
> -        if ( is_hvm_domain(d) )
> -        {
> -            d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs);
> -            if ( !d->arch.pirq_emuirq )
> -                goto fail;
> -            for (i = 0; i < d->nr_pirqs; i++)
> -                d->arch.pirq_emuirq[i] = IRQ_UNBOUND;
> -        }
> -
> -
>          if ( (rc = iommu_domain_init(d)) != 0 )
>              goto fail;
>  
> @@ -660,8 +644,6 @@ int arch_domain_create(struct domain *d,
>   fail:
>      d->is_dying = DOMDYING_dead;
>      vmce_destroy_msr(d);
> -    xfree(d->arch.pirq_irq);
> -    xfree(d->arch.pirq_emuirq);
>      cleanup_domain_irq_mapping(d);
>      free_xenheap_page(d->shared_info);
>      if ( paging_initialised )
> @@ -714,8 +696,6 @@ void arch_domain_destroy(struct domain *
>  #endif
>  
>      free_xenheap_page(d->shared_info);
> -    xfree(d->arch.pirq_irq);
> -    xfree(d->arch.pirq_emuirq);
>      cleanup_domain_irq_mapping(d);
>  }
>  
> --- 2011-04-29.orig/xen/arch/x86/hvm/hvm.c
> +++ 2011-04-29/xen/arch/x86/hvm/hvm.c
> @@ -252,32 +252,36 @@ void hvm_migrate_timers(struct vcpu *v)
>      pt_migrate(v);
>  }
>  
> -void hvm_migrate_pirqs(struct vcpu *v)
> +static int hvm_migrate_pirq(struct domain *d, unsigned int pirq,
> +                            struct hvm_pirq_dpci *pirq_dpci, void *arg)
>  {
> -    int pirq, irq;
> -    struct irq_desc *desc;
> -    struct domain *d = v->domain;
> -    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
> -    
> -    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
> -       return;
> +    struct vcpu *v = arg;
>  
> -    spin_lock(&d->event_lock);
> -    for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
> -          pirq < d->nr_pirqs;
> -          pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1)
> )
> +    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
> +         (pirq_dpci->gmsi.dest_vcpu_id == v->vcpu_id) )
>      {
> -        if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) ||
> -               (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) )
> -            continue;
> -        desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
> -        if (!desc)
> -            continue;
> -        irq = desc - irq_desc;
> -        ASSERT(MSI_IRQ(irq));
> +        struct irq_desc *desc =
> +            pirq_spin_lock_irq_desc(d, dpci_pirq(pirq_dpci), NULL);
> +
> +        if ( !desc )
> +            return 0;
> +        ASSERT(MSI_IRQ(desc - irq_desc));
>          irq_set_affinity(desc, cpumask_of(v->processor));
>          spin_unlock_irq(&desc->lock);
>      }
> +
> +    return 0;
> +}
> +
> +void hvm_migrate_pirqs(struct vcpu *v)
> +{
> +    struct domain *d = v->domain;
> +
> +    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
> +       return;
> +
> +    spin_lock(&d->event_lock);
> +    pt_pirq_iterate(d, hvm_migrate_pirq, v);
>      spin_unlock(&d->event_lock);
>  }
>  
> @@ -501,8 +505,6 @@ int hvm_domain_initialise(struct domain
>      return rc;
>  }
>  
> -extern void msixtbl_pt_cleanup(struct domain *d);
> -
>  void hvm_domain_relinquish_resources(struct domain *d)
>  {
>      hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
> --- 2011-04-29.orig/xen/arch/x86/hvm/irq.c
> +++ 2011-04-29/xen/arch/x86/hvm/irq.c
> @@ -33,7 +33,7 @@ static void assert_irq(struct domain *d,
>      int pirq = domain_emuirq_to_pirq(d, ioapic_gsi);
>      if ( pirq != IRQ_UNBOUND )
>      {
> -        send_guest_pirq(d, pirq);
> +        send_guest_pirq(d, pirq_info(d, pirq));
>          return;
>      }
>      vioapic_irq_positive_edge(d, ioapic_gsi);
> --- 2011-04-29.orig/xen/arch/x86/hvm/vmsi.c
> +++ 2011-04-29/xen/arch/x86/hvm/vmsi.c
> @@ -65,11 +65,10 @@ static void vmsi_inj_irq(
>      }
>  }
>  
> -int vmsi_deliver(struct domain *d, int pirq)
> +int vmsi_deliver(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
>  {
> -    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
> -    uint32_t flags = hvm_irq_dpci->mirq[pirq].gmsi.gflags;
> -    int vector = hvm_irq_dpci->mirq[pirq].gmsi.gvec;
> +    uint32_t flags = pirq_dpci->gmsi.gflags;
> +    int vector = pirq_dpci->gmsi.gvec;
>      uint8_t dest = (uint8_t)flags;
>      uint8_t dest_mode = !!(flags & VMSI_DM_MASK);
>      uint8_t delivery_mode = (flags & VMSI_DELIV_MASK) >>
> GLFAGS_SHIFT_DELIV_MODE;
> @@ -82,11 +81,7 @@ int vmsi_deliver(struct domain *d, int p
>                  "vector=%x trig_mode=%x\n",
>                  dest, dest_mode, delivery_mode, vector, trig_mode);
>  
> -    if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) )
> -    {
> -        gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
> -        return 0;
> -    }
> +    ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI);
>  
>      switch ( delivery_mode )
>      {
> @@ -349,7 +344,7 @@ static void del_msixtbl_entry(struct msi
>      call_rcu(&entry->rcu, free_msixtbl_entry);
>  }
>  
> -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
> +int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
>  {
>      struct irq_desc *irq_desc;
>      struct msi_desc *msi_desc;
> @@ -358,6 +353,7 @@ int msixtbl_pt_register(struct domain *d
>      int r = -EINVAL;
>  
>      ASSERT(spin_is_locked(&pcidevs_lock));
> +    ASSERT(spin_is_locked(&d->event_lock));
>  
>      /*
>       * xmalloc() with irq_disabled causes the failure of check_lock()
> @@ -367,7 +363,7 @@ int msixtbl_pt_register(struct domain *d
>      if ( !new_entry )
>          return -ENOMEM;
>  
> -    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
> +    irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
>      if ( !irq_desc )
>      {
>          xfree(new_entry);
> @@ -404,7 +400,7 @@ out:
>      return r;
>  }
>  
> -void msixtbl_pt_unregister(struct domain *d, int pirq)
> +void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
>  {
>      struct irq_desc *irq_desc;
>      struct msi_desc *msi_desc;
> @@ -412,8 +408,9 @@ void msixtbl_pt_unregister(struct doman
>      struct msixtbl_entry *entry;
>  
>      ASSERT(spin_is_locked(&pcidevs_lock));
> +    ASSERT(spin_is_locked(&d->event_lock));
>  
> -    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
> +    irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
>      if ( !irq_desc )
>          return;
>  
> @@ -447,7 +444,7 @@ found:
>      spin_unlock_irq(&irq_desc->lock);
>  }
>  
> -void msixtbl_pt_cleanup(struct domain *d, int pirq)
> +void msixtbl_pt_cleanup(struct domain *d)
>  {
>      struct msixtbl_entry *entry, *temp;
>      unsigned long flags;
> --- 2011-04-29.orig/xen/arch/x86/irq.c
> +++ 2011-04-29/xen/arch/x86/irq.c
> @@ -814,7 +814,7 @@ static void irq_guest_eoi_timer_fn(void
>          {
>              struct domain *d = action->guest[i];
>              unsigned int pirq = domain_irq_to_pirq(d, irq);
> -            if ( test_and_clear_bit(pirq, d->pirq_mask) )
> +            if ( test_and_clear_bool(pirq_info(d, pirq)->masked) )
>                  action->in_flight--;
>          }
>      }
> @@ -874,11 +874,12 @@ static void __do_IRQ_guest(int irq)
>  
>      for ( i = 0; i < action->nr_guests; i++ )
>      {
> -        unsigned int pirq;
> +        struct pirq *pirq;
> +
>          d = action->guest[i];
> -        pirq = domain_irq_to_pirq(d, irq);
> +        pirq = pirq_info(d, domain_irq_to_pirq(d, irq));
>          if ( (action->ack_type != ACKTYPE_NONE) &&
> -             !test_and_set_bit(pirq, d->pirq_mask) )
> +             !test_and_set_bool(pirq->masked) )
>              action->in_flight++;
>          if ( hvm_do_IRQ_dpci(d, pirq) )
>          {
> @@ -950,28 +951,71 @@ struct irq_desc *domain_spin_lock_irq_de
>      return desc;
>  }
>  
> -static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +/*
> + * Same with struct pirq already looked up, and d->event_lock already
> + * held (thus the PIRQ <-> IRQ mapping can't change under our feet).
> + */
> +struct irq_desc *pirq_spin_lock_irq_desc(
> +    struct domain *d, const struct pirq *pirq, unsigned long *pflags)
> +{
> +    int irq = pirq->arch.irq;
> +    struct irq_desc *desc;
> +    unsigned long flags;
> +
> +    ASSERT(spin_is_locked(&d->event_lock));
> +
> +    if ( irq <= 0 )
> +        return NULL;
> +
> +    desc = irq_to_desc(irq);
> +    spin_lock_irqsave(&desc->lock, flags);
> +
> +    if ( pflags )
> +        *pflags = flags;
> +
> +    ASSERT(pirq == pirq_info(d, domain_irq_to_pirq(d, irq)));
> +    ASSERT(irq == pirq->arch.irq);
> +
> +    return desc;
> +}
> +
> +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq,
> +                                struct pirq **pinfo)
>  {
>      int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL,
>                                  NULL, NULL);
> +    struct pirq *info;
>  
> -    return err != -EEXIST ? err : 0;
> +    if ( err && err != -EEXIST )
> +        return err;
> +    info = pirq_get_info(d, pirq);
> +    if ( !info )
> +    {
> +        if ( !err )
> +            radix_tree_delete(&d->arch.irq_pirq, irq, NULL);
> +        return -ENOMEM;
> +    }
> +    *pinfo = info;
> +    return 0;
>  }
>  
> -static void set_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq,
> +                                struct pirq *info)
>  {
>      *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq;
> -    d->arch.pirq_irq[pirq] = irq;
> +    info->arch.irq = irq;
>  }
>  
> -static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq
> *pirq)
>  {
> -    d->arch.pirq_irq[pirq] = 0;
> +    pirq->arch.irq = 0;
>      *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL;
>  }
>  
> -static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq,
> +                                    struct pirq *info)
>  {
> +    pirq_cleanup_check(info, d, pirq);
>      radix_tree_delete(&d->arch.irq_pirq, irq, NULL);
>  }
>  
> @@ -987,10 +1031,12 @@ int init_domain_irq_mapping(struct domai
>      for ( i = 1; platform_legacy_irq(i); ++i )
>          if ( !IO_APIC_IRQ(i) )
>          {
> -            err = prepare_domain_irq_pirq(d, i, i);
> +            struct pirq *info;
> +
> +            err = prepare_domain_irq_pirq(d, i, i, &info);
>              if ( err )
>                  break;
> -            set_domain_irq_pirq(d, i, i);
> +            set_domain_irq_pirq(d, i, i, info);
>          }
>  
>      return err;
> @@ -1008,6 +1054,48 @@ void cleanup_domain_irq_mapping(struct d
>                             irq_slot_free, NULL);
>  }
>  
> +struct pirq *alloc_pirq_struct(struct domain *d)
> +{
> +    size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) :
> +                                   offsetof(struct pirq, arch.hvm);
> +    struct pirq *pirq = xmalloc_bytes(sz);
> +
> +    if ( pirq )
> +    {
> +        memset(pirq, 0, sz);
> +        if ( is_hvm_domain(d) )
> +        {
> +            pirq->arch.hvm.emuirq = IRQ_UNBOUND;
> +            pt_pirq_init(d, &pirq->arch.hvm.dpci);
> +        }
> +    }
> +
> +    return pirq;
> +}
> +
> +void (pirq_cleanup_check)(struct pirq *info, struct domain *d, int pirq)
> +{
> +    /*
> +     * Check whether all fields have their default values, and delete
> +     * the entry from the tree if so.
> +     *
> +     * NB: Common parts were already checked.
> +     */
> +    if ( info->arch.irq )
> +        return;
> +
> +    if ( is_hvm_domain(d) )
> +    {
> +        if ( info->arch.hvm.emuirq != IRQ_UNBOUND )> +            return;
> +        if ( !pt_pirq_cleanup_check(&info->arch.hvm.dpci) )
> +            return;
> +    }
> +
> +    if ( radix_tree_delete(&d->pirq_tree, pirq, NULL) != info )
> +        BUG();
> +}
> +
>  /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
>  static void flush_ready_eoi(void)
>  {
> @@ -1068,18 +1156,22 @@ static void set_eoi_ready(void *data)
>      flush_ready_eoi();
>  }
>  
> -static void __pirq_guest_eoi(struct domain *d, int pirq)
> +void pirq_guest_eoi(struct domain *d, struct pirq *pirq)
> +{
> +    struct irq_desc *desc;
> +
> +    ASSERT(local_irq_is_enabled());
> +    desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
> +    if ( desc )
> +        desc_guest_eoi(d, desc, pirq);
> +}
> +
> +void desc_guest_eoi(struct domain *d, struct irq_desc *desc, stuct pirq
> *pirq)
>  {
> -    struct irq_desc         *desc;
>      irq_guest_action_t *action;
>      cpumask_t           cpu_eoi_map;
>      int                 irq;
>  
> -    ASSERT(local_irq_is_enabled());
> -    desc = domain_spin_lock_irq_desc(d, pirq, NULL);
> -    if ( desc == NULL )
> -        return;
> -
>      if ( !(desc->status & IRQ_GUEST) )
>      {
>          spin_unlock_irq(&desc->lock);
> @@ -1091,12 +1183,12 @@ static void __pirq_guest_eoi(struct doma
>  
>      if ( action->ack_type == ACKTYPE_NONE )
>      {
> -        ASSERT(!test_bit(pirq, d->pirq_mask));
> +        ASSERT(!pirq->masked);
>          stop_timer(&action->eoi_timer);
>          _irq_guest_eoi(desc);
>      }
>  
> -    if ( unlikely(!test_andclear_bit(pirq, d->pirq_mask)) ||
> +    if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
>           unlikely(--action->in_flight != 0) )
>      {
>          spin_unlock_irq(&desc->lock);
> @@ -1131,27 +1223,23 @@ static void __pirq_guest_eoi(struct doma
>          on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
>  }
>  
> -int pirq_guest_eoi(struct domain *d, int irq)
> -{
> -    if ( (irq < 0) || (irq >= d->nr_pirqs) )
> -        return -EINVAL;
> -
> -    __pirq_guest_eoi(d, irq);
> -
> -    return 0;
> -}
> -
>  int pirq_guest_unmask(struct domain *d)
>  {
> -    unsigned int irq, nr = d->nr_pirqs;
> +    unsigned int pirq = 0, n, i;
> +    unsigned long indexes[16];
> +    struct pirq *pirqs[ARRAY_SIZE(indexes)];
>  > -    for ( irq = find_first_bit(d->pirq_mask, nr);
> -          irq < nr;
> -          irq = find_next_bit(d->pirq_mask, nr, irq+1) )
> -    {
> -        if  !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask))
> )
> -            __pirq_guest_eoi(d, irq);
> -    }
> +    do {
> +        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
> +                                   ARRAY_SIZE(pirqs), indexes);
> +        for ( i = 0; i < n; ++i )
> +        {
> +            pirq = indexes[i];
> +            if ( pirqs[i]->masked &&
> +                 !test_bit(pirqs[i]->evtchn, &shared_info(d, evtchn_mask)) )
> +                pirq_guest_eoi(d, pirqs[i]);
> +        }
> +    } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
>  
>      return 0;
>  }
> @@ -1221,7 +1309,7 @@ int pirq_shared(struct domain *d, int pi
>      return shared;
>  }
>  
> -int pirq_guest_bind(struct vcpu *v, int pirq, int will_share)
> +int pirq_guest_bind(struct vcpu *v, int pirq, struct pirq *info, int
> will_share)
>  {
>      unsigned int        irq;
>      struct irq_desc         *desc;
> @@ -1233,7 +1321,7 @@ int pirq_guest_bind(struct vcpu *v, int
>      BUG_ON(!local_irq_is_enabled());
>  
>   retry:
> -    desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
> +    desc = pirq_spin_lock_irq_desc(v->domain, info, NULL);
>      if ( desc == NULL )
>      {
>          rc = -EINVAL;
> @@ -1334,7 +1422,7 @@ int pirq_guest_bind(struct vcpu *v, int
>  }
>  
>  static irq_guest_action_t *__pirq_guest_unbind(
> -    struct domain *d, int pirq, struct irq_desc *desc)
> +    struct domain *d, int pirq, struct pirq *info, struct irq_desc *desc)
>  {
>      unsigned int       irq;
>      irq_guest_action_t *action;
> @@ -1363,13 +1451,13 @@ static irq_guest_actin_t *__pirq_guest_
>      switch ( action->ack_type )
>      {
>      case ACKTYPE_UNMASK:
> -        if ( test_and_clear_bit(pirq, d->pirq_mask) &&
> +        if ( test_and_clear_bool(info->masked) &&
>               (--action->in_flight == 0) )
>              desc->handler->end(irq);
>          break;
>      case ACKTYPE_EOI:
>          /* NB. If #guests == 0 then we clear the eoi_map later on. */
> -        if ( test_and_clear_bit(pirq, d->pirq_mask) &&
> +        if ( test_and_clear_bool(info->masked) &&
>               (--action->in_flight == 0) &&
>               (action->nr_guests != 0) )
>          {
> @@ -1387,9 +1475,9 @@ static irq_guest_action_t *__pirq_guest_
>  
>      /*
>       * The guest cannot re-bind to this IRQ until this function returns. So,
> -     * when we have flushed this IRQ from pirq_mask, it should remain
> flushed.
> +     * when we have flushed this IRQ from ->masked, it should remain flushed.
>       */
> -    BUG_ON(test_bit(pirq, d->pirq_mask));
> +    BUG_ON(info->masked);
>  
>      if ( action->nr_guests != 0 )
>          return NULL;
> @@ -1427,7 +1515,7 @@ static irq_guest_action_t *__pirq_guest_
>      return action;
>  }
>  
> -void pirq_guest_unbind(struct domain *d, int pirq)
> +void pirq_guest_unbind(struct domain *d, int pirq, struct pirq *info)
>  {
>      irq_guest_action_t *oldaction = NULL;
>      struct irq_desc *desc;
> @@ -1436,19 +1524,19 @@ void pirq_guest_unbind(struct domain *d,
>      WARN_ON(!spin_is_locked(&d->event_lock));
>  
>      BUG_ON(!local_irq_is_enabled());
> -    desc = domain_spin_lock_irq_desc(d, pirq, NULL);
> +    desc = pirq_spin_lock_irq_desc(d, info, NULL);
>  
>      if ( desc == NULL )
>      {
> -        irq = -domain_pirq_to_irq(d, pirq);
> +        irq = -info->arch.irq;
>          BUG_ON(irq <= 0);
>          desc = irq_to_desc(irq);
>          spin_lock_irq(&desc->lock);
> -        clear_domain_irq_pirq(d, irq, pirq);
> +        clear_domain_irq_pirq(d, irq, info);
>      }
>      else
>      {
> -        oldaction = __pirq_guest_unbind(d, pirq, desc);
> +        oldaction = __pirq_guest_unbind(d, pirq, info, desc);
>      }
>  
>      spin_unlock_irq(&desc->lock);
> @@ -1459,10 +1547,10 @@ void pirq_guest_unbind(struct domain *d,
>          xfree(oldaction);
>      }
>      else if ( irq > 0 )
> -        cleanup_domain_irq_pirq(d, irq, pirq);
> +        cleanup_domain_irq_pirq(d, irq, pirq, info);
>  }
>  
> -static int pirq_guest_force_unbind(struct domain *d, int irq)
> +static int pirq_guest_force_unbind(struct domain *d, int irq, struct pirq
> *info)
>  {
>      struct irq_desc *desc;
>      irq_guest_action_t *action, *oldaction = NULL;
> @@ -1471,7 +1559,7 @@ static int pirq_guest_force_unbind(struc
>      WARN_ON(!spin_is_locked(&d->event_lock));
>  
>      BUG_ON(!local_irq_is_enabled());
> -    desc = domain_spin_lock_irq_desc(d, irq, NULL);
> +    desc = pirq_spin_lock_irq_desc(d, info, NULL);
>      BUG_ON(desc == NULL);
>  
>      if ( !(desc->status & IRQ_GUEST) )
> @@ -1491,7 +1579,7 @@ static int pirq_guest_force_unbind(struc
>          goto out;
>  
>      bound = 1;
> -    oldaction = __pirq_guest_unbind(d, irq, desc);
> +    oldaction = __pirq_guest_unbind(d, irq, info, desc);
>  
>   out:
>      spin_unlock_irq(&desc->lock);
> @@ -1505,6 +1593,13 @@ static int pirq_guest_force_unbind(struc
>      return bound;
>  }
>  
> +static inline bool_t is_free_pirq(const struct domain *d,
> +                                  const struct pirq *pirq)
> +{
> +    return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) ||
> +        pirq->arch.hvm.emuirq == IRQ_UNBOUND));
> +}
> +
>  int get_free_pirq(struct domain *d, int type, int index)
>  {
>      int i;
> @@ -1514,29 +1609,17 @@ int get_free_pirq(struct domain *d, int
>      if ( type == MAP_PIRQ_TYPE_GSI )
>      {
>          for ( i = 16; i < nr_irqs_gsi; i++ )
> -            if ( !d->arch.pirq_irq[i] )
> -            {
> -                if ( !is_hvm_domain(d) ||
> -                        d->arch.pirq_emuirq[i] == IRQ_UNBOUND )
> -                    break;
> -            }
> -        if ( i == nr_irqs_gsi )
> -            return -ENOSPC;
> +            if ( is_free_pirq(d, pirq_info(d, i)) )
> +                return i;
>      }
>      else
>      {
>          for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
> -            if ( !d->arch.pirq_irq[i] )
> -            {
> -                if ( !is_hvm_domain(d) ||
> -                        d->arch.pirq_emuirq[i] == IRQ_UNBOUND )
> -                    break;
> -            }
> -        if ( i < nr_irqs_gsi )
> -            return -ENOSPC;
> +            if ( is_free_pirq(d, pirq_info(d, i)) )
> +                return i;
>      }
>  
> -    return i;
> +    return -ENOSPC;
>  }
>  
>  int map_domain_pirq(
> @@ -1544,6 +1627,7 @@ int map_domain_pirq(
>  {
>      int ret = 0;
>      int old_irq, old_pirq;
> +    struct pirq *info;
>      struct irq_desc *desc;
>      unsigned long flags;
>      struct msi_desc *msi_desc;
> @@ -1583,7 +1667,7 @@ int map_domain_pirq(
>          return ret;
>      }
>  
> -    ret = prepare_domain_irq_pirq(d, irq, pirq);
> +    ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
>      if ( ret )
>          return ret;
>  
> @@ -1608,20 +1692,20 @@ int map_domain_pirq(
>              dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
>                d->domain_id, irq);
>          desc->handler = &pci_msi_type;
> -        set_domain_irq_pirq(d, irq, pirq);
> +        set_domain_irq_pirq(d, irq, pirq, info);
>          setup_msi_irq(pdev, msi_desc, irq);
>          spin_unlock_irqrestore(&desc->lock, flags);
>      }
>      else
>      {
>          spin_lock_irqsave(&desc->lock, flags);
> -        set_domain_irq_pirq(d, irq, pirq);
> +        set_domain_irq_pirq(d, irq, pirq, info);
>          spin_unlock_irqrestore(&desc->lock, flags);
>      }
>  
>   done:
>      if ( ret )
> -        cleanup_domain_irq_pirq(d, irq, pirq);
> +        cleanup_domain_irq_pirq(d, irq, pirq, info);
>      return ret;
>  }
>  
> @@ -1632,6 +1716,7 @@ int unmap_domain_pirq(struct domain *d,
>      struct irq_desc *desc;
>      int irq, ret = 0;
>      bool_t forced_unbind;
> +    struct pirq *info;
>      struct msi_desc *msi_desc = NULL;
>  
>      if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
> @@ -1640,8 +1725,8 @@ int unmap_domain_pirq(struct domain *d,
>      ASSERT(spin_is_locked(&pcidevs_lock));
>      ASSERT(spin_is_locked(&d->event_lock));
>  
> -    irq = domain_pirq_to_irq(d, pirq);
> -    if ( irq <= 0 )
> +    info = pirq_info(d, pirq);
> +    if ( !info || (irq = info->arch.irq) <= 0 )
>      {
>          dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
>                  d->domain_id, pirq);
> @@ -1649,7 +1734,7 @@ int unmap_domain_pirq(struct domain *d,
>          goto done;
>      }
>  
> -    forced_unbind = pirq_guest_force_unbind(d, pirq);
> +    forced_unbind = pirq_guest_force_unbind(d, pirq, info);
>      if ( forced_unbind )
>          dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
>                  d->domain_id, pirq);
> @@ -1664,10 +1749,10 @@ int unmap_domain_pirq(struct domain *d,
>      BUG_ON(irq != domain_pirq_to_irq(d, pirq));
>  
>      if ( !forced_unbind )
> -        clear_domain_irq_pirq(d, irq, pirq);
> +        clear_domain_irq_pirq(d, irq, info);
>      else
>      {
> -        d->arch.pirq_irq[pirq] = -irq;
> +        info->arch.irq = -irq;
>          *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void
> *)(long)-pirq;
>      }
>  
> @@ -1676,7 +1761,7 @@ int unmap_domain_pirq(struct domain *d,
>          msi_free_irq(msi_desc);
>  
>      if ( !forced_unbind )
> -        cleanup_domain_irq_pirq(d, irq, pirq);
> +        cleanup_domain_irq_pirq(d, irq, pirq, info);
>  
>      ret = irq_deny_access(d, pirq);
>      if ( ret )
> @@ -1698,7 +1783,7 @@ void free_domain_pirqs(struct domain *d)
>      spin_lock(&d->event_lock);
>  
>      for ( i = 0; i < d->nr_pirqs; i++ )
> -        if ( d->arch.pirq_irq[i] > 0 )
> +        if ( domain_pirq_to_irq(d, i) > 0 )
>              unmap_domain_pirq(d, i);
>  
>      spin_unlock(&d->event_lock);
> @@ -1714,6 +1799,7 @@ static void dump_irqs(unsigned char key)
>      struct irq_cfg *cfg;
>      irq_guest_action_t *action;
>      struct domain *d;
> +    const struct pirq *info;
>      unsigned long flags;
>  
>      printk("Guest interrupt information:\n");
> @@ -1748,20 +1834,18 @@ static void dump_irqs(unsigned char key)
>              {
>                  d = action->guest[i];
>                  pirq = domain_irq_to_pirq(d, irq);
> +                info = pirq_info(d, pirq);
>                  printk("%u:%3d(%c%c%c%c)",
>                         d->domain_id, pirq,
> -                       (test_bit(d->pirq_to_evtchn[pirq],
> +                       (test_bit(info->evtchn,
>                                   &shared_info(d, evtchn_pending)) ?
>                          'P' : '-'),
> -                       (test_bit(d->pirq_to_evtchn[pirq] /
> -                                 BITS_PER_EVTCHN_WORD(d),
> +                       (test_bit(info->evtchn / BITS_PER_EVTCHN_WORD(d),
>                                   &vcpu_info(d->vcpu[0], evtchn_pending_sel))
> ?
>                          'S' : '-'),
> -                       (test_bit(d->pirq_to_evtchn[pirq],
> -                                 &shared_info(d, evtchn_mask)) ?
> +                       (test_bit(info->evtchn, &shared_info(d, evtchn_mask))
> ?
>                          'M' : '-'),
> -                       (test_bit(pirq, d->pirq_mask) ?
> -                        'M' : '-'));
> +                       (info->masked ? 'M' : '-'));
>                  if ( i != action->nr_guests )
>                      printk(",");
>              }
> @@ -1868,6 +1952,7 @@ void fixup_irqs(void)
>  int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq)
>  {
>      int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
> +    struct pirq *info;
>  
>      ASSERT(spin_is_locked(&d->event_lock));
>  
> @@ -1894,6 +1979,10 @@ int map_domain_emuirq_pirq(struct domain
>          return 0;
>      }
>  
> +    info = pirq_get_info(d, pirq);
> +    if ( !info )
> +        return -ENOMEM;
> +
>      /* do not store emuirq mappings for pt devices */
>      if ( emuirq != IRQ_PT )
>      {
> @@ -1909,10 +1998,11 @@ int map_domain_emuirq_pirq(struct domain
>                  (void *)((long)pirq + 1);
>              break;
>          default:
> +            pirq_cleanup_check(info, d, pirq);
>              return err;
>          }
>      }
> -    d->arch.pirq_emuirq[pirq] = emuirq;
> +    info->arch.hvm.emuirq = emuirq;
>  
>      return 0;
>  }
> @@ -1920,6 +2010,7 @@ int map_domain_emuirq_pirq(struct domain
>  int unmap_domain_pirq_emuirq(struct domain *d, int pirq)
>  {
>      int emuirq, ret = 0;
> +    struct pirq *info;
>  
>      if ( !is_hvm_domain(d) )
>          return -EINVAL;
> @@ -1938,7 +2029,12 @@ int unmap_domain_pirq_emuirq(struct doma
>          goto done;
>      }
>  
> -    d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND;
> +    info = pirq_info(d, pirq);
> +    if ( info )
> +    {
> +        info->arch.hvm.emuirq = IRQ_UNBOUND;
> +        pirq_cleanup_check(info, d, pirq);
> +    }
>      if ( emuirq != IRQ_PT )
>          radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL);
>  
> @@ -1946,16 +2042,9 @@ int unmap_domain_pirq_emuirq(struct doma
>      return ret;
>  }
>  
> -int hvm_domain_use_pirq(struct domain *d, int pirq)
> +bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq)
>  {
> -    int emuirq;
> -    
> -    if ( !is_hvm_domain(d) )
> -        return 0;
> -
> -    emuirq = domain_pirq_to_emuirq(d, pirq);
> -    if ( emuirq != IRQ_UNBOUND && d->pirq_to_evtchn[pirq] != 0 )
> -        return 1;
> -    else
> -        return 0;
> +    return is_hvm_domain(d) &&
> +           pirq->arch.hvm.emuirq != IRQ_UNBOUND &&
> +           pirq->evtchn != 0;
>  }
> --- 2011-04-29.orig/xen/arch/x86/physdev.c
> +++ 2011-04-29/xen/arch/x86/physdev.c
> @@ -258,20 +258,28 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
>      {
>      case PHYSDEVOP_eoi: {
>          struct physdev_eoi eoi;
> +        struct pirq *pirq;
> +
>          ret = -EFAULT;
>          if ( copy_from_guest(&eoi, arg, 1) != 0 )
>              break;
>          ret = -EINVAL;
>          if ( eoi.irq >= v->domain->nr_pirqs )
>              break;
> +        spin_lock(&v->domain->event_lock);
> +        pirq = pirq_info(v->domain, eoi.irq);
> +        if ( !pirq ) {
> +            spin_unlock(&v->domain->event_lock);
> +            break;
> +        }
>          if ( !is_hvm_domain(v->domain) &&
>               v->domain->arch.pv_domain.pirq_eoi_map )
> -            evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
> +            evtchn_unmask(pirq->evtchn);
>          if ( !is_hvm_domain(v->domain) ||
> -             domain_pirq_to_emuirq(v->domain, eoi.irq) == IRQ_PT )
> -            ret = pirq_guest_eoi(v->domain, eoi.irq);
> -        else
> -            ret = 0;
> +             pirq->arch.hvm.emuirq == IRQ_PT )
> +            pirq_guest_eoi(v->domain, pirq);
> +        spin_unlock(&v->domain->event_lock);
> +        ret = 0;
>          break;
>      }
>  
> @@ -564,11 +572,23 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
>              break;
>  
>          spin_lock(&d->event_lock);
> -        out.pirq = get_free_pirq(d, out.type, 0);
> -        d->arch.pirq_irq[out.pirq] = PIRQ_ALLOCATED;
> +        ret = get_free_pirq(d, out.type, 0);
> +        if ( ret >= 0 )
> +        {
> +            struct pirq *info = pirq_get_info(d, ret);
> +
> +            if ( info )
> +                info->arch.irq = PIRQ_ALLOCATED;
> +            else
> +                ret = -ENOMEM;
> +        }
>          spin_unlock(&d->event_lock);
>  
> -        ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0;
> +        if ( ret >= 0 )
> +        {
> +            out.pirq = ret;
> +            ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0;
> +        }
>  
>          rcu_unlock_domain(d);
>          break;
> --- 2011-04-29.orig/xen/common/domain.c
> +++ 2011-04-29/xen/common/domain.c
> @@ -290,13 +290,7 @@ struct domain *domain_create(
>          if ( d->nr_pirqs > nr_irqs )
>              d->nr_pirqs = nr_irqs;
>  
> -        d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
> -        d->pirq_mask = xmalloc_array(
> -            unsigned long, BITS_TO_LONGS(d->nr_pirqs));
> -        if ( (d->pirq_to_evtchn == NULL) || (d->pirq_mask == NULL) )
> -            goto fail;
> -        memset(d->pirq_to_evtchn, 0, d->nr_pirqs * 
> sizeof(*d->pirq_to_evtchn));
> -        bitmap_zero(d->pirq_mask, d->nr_pirqs);
> +        INIT_RADIX_TREE(&d->pirq_tree, 0);
>  
>          if ( evtchn_init(d) != 0 )
>              goto fail;
> @@ -346,6 +340,7 @@ struct domain *domain_create(
>      {
>          evtchn_destroy(d);
>          evtchn_destroy_final(d);
> +        radix_tree_destroy(&d->pirq_tree, free_pirq_struct, NULL);
>      }
>      if ( init_status & INIT_rangeset )
>          rangeset_domain_destroy(d);
> @@ -353,8 +348,6 @@ struct domain *domain_create(
>          watchdog_domain_destroy(d);
>      if ( init_status & INIT_xsm )
>          xsm_free_security_domain(d);
> -    xfree(d->pirq_mask);
> -    xfree(d->pirq_to_evtchn);
>      free_cpumask_var(d->domain_dirty_cpumask);
>      free_domain_struct(d);
>      return NULL;
> @@ -680,8 +673,7 @@ static void complete_domain_destroy(stru
>  
>      evtchn_destroy_final(d);
>  
> -    xfree(d->pirq_mask);
> -    xfree(d->pirq_to_evtchn);
> +    radix_tree_destroy(&d->pirq_tree, free_pirq_struct, NULL);
>  
>      xsm_free_security_domain(d);
>      free_cpumask_var(d->domain_dirty_cpumask);
> @@ -963,6 +955,20 @@ long vm_assist(struct domain *p, unsigne
>      return -ENOSYS;
>  }
>  
> +struct pirq *pirq_get_info(struct domain *d, int pirq)
> +{
> +    struct pirq *info = pirq_info(d, pirq);
> +
> +    if ( !info && (info = alloc_pirq_struct(d)) != NULL &&
> +         radix_tree_insert(&d->pirq_tree, pirq, info, NULL, NULL) )
> +    {
> +         free_pirq_struct(info);
> +         info = NULL;
> +    }
> +
> +    return info;
> +}
> +
>  struct migrate_info {
>      long (*func)(void *data);
>      void *data;
> --- 2011-04-29.orig/xen/common/event_channel.c
> +++ 2011-04-29/xen/common/event_channel.c
> @@ -325,6 +325,7 @@ static long evtchn_bind_pirq(evtchn_bind
>      struct evtchn *chn;
>      struct domain *d = current->domain;
>      struct vcpu   *v = d->vcpu[0];
> +    struct pirq   *info;
>      int            port, pirq = bind->pirq;
>      long           rc;
>  
> @@ -336,7 +337,7 @@ static long evtchn_bind_pirq(evtchn_bind
>  
>      spin_lock(&d->event_lock);
>  
> -    if ( d->pirq_to_evtchn[pirq] != 0 )
> +    if ( pirq_to_evtchn(d, pirq) != 0 )
>          ERROR_EXIT(-EEXIST);
>  
>      if ( (port = get_free_port(d)) < 0 )
> @@ -344,14 +345,18 @@ static long evtchn_bind_pirq(evtchn_bind
>  
>      chn = evtchn_from_port(d, port);
>  
> -    d->pirq_to_evtchn[pirq] = port;
> +    info = pirq_get_info(d, pirq);
> +    if ( !info )
> +        ERROR_EXIT(-ENOMEM);
> +    info->evtchn = port;
>      rc = (!is_hvm_domain(d)
> -          ? pirq_guest_bind(
> -              v, pirq, !!(bind->flags & BIND_PIRQ__WILL_SHARE))
> +          ? pirq_guest_bind(v, pirq, info,
> +                            !!(bind->flags & BIND_PIRQ__WILL_SHARE))
>            : 0);
>      if ( rc != 0 )
>      {
> -        d->pirq_to_evtchn[pirq] = 0;
> +        info->evtchn = 0;
> +        pirq_cleanup_check(info, d, pirq);
>          goto out;
>      }
>  
> @@ -404,12 +409,18 @@ static long __evtchn_close(struct domain
>      case ECS_UNBOUND:
>          break;
>  
> -    case ECS_PIRQ:
> +    case ECS_PIRQ: {
> +        struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq);
> +
> +        if ( !pirq )
> +            break;
>          if ( !is_hvm_domain(d1) )
> -            pirq_guest_unbind(d1, chn1->u.pirq.irq);
> -        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
> +            pirq_guest_unbind(d1, chn1->u.pirq.irq, pirq);
> +        pirq->evtchn = 0;
> +        pirq_cleanup_check(pirq, d1, chn1->u.pirq.irq);
>          unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
>          break;
> +    }
>  
>      case ECS_VIRQ:
>          for_each_vcpu ( d1, v )
> @@ -659,9 +670,9 @@ void send_guest_global_virq(struct domai
>      spin_unlock_irqrestore(&v->virq_lock, flags);
>  }
>  
> -int send_guest_pirq(struct domain *d, int pirq)
> +int send_guest_pirq(struct domain *d, const struct pirq *pirq)
>  {
> -    int port = d->pirq_to_evtchn[pirq];
> +    int port;
>      struct evtchn *chn;
>  
>      /*
> @@ -670,7 +681,7 @@ int send_guest_pirq(struct domain *d, in
>       * HVM guests: Port is legitimately zero when the guest disables the
>       *     emulated interrupt/evtchn.
>       */
> -    if ( port == 0 )
> +    if ( pirq == NULL || (port = pirq->evtchn) == 0 )
>      {
>          BUG_ON(!is_hvm_domain(d));
>          return 0;
> @@ -812,13 +823,10 @@ int evtchn_unmask(unsigned int port)
>      struct domain *d = current->domain;
>      struct vcpu   *v;
>  
> -    spin_lock(&d->event_lock);
> +    ASSERT(spin_is_locked(&d->event_lock));
>  
>      if ( unlikely(!port_is_valid(d, port)) )
> -    {
> -        spin_unlock(&d->event_lock);
>          return -EINVAL;
> -    }
>  
>      v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
>  
> @@ -834,8 +842,6 @@ int evtchn_unmask(unsigned int port)
>          vcpu_mark_events_pending(v);
>      }
>  
> -    spin_unlock(&d->event_lock);
> -
>      return 0;
>  }
>  
> @@ -960,7 +966,9 @@ long do_event_channel_op(int cmd, XEN_GU
>          struct evtchn_unmask unmask;
>          if ( copy_from_guest(&unmask, arg, 1) != 0 )
>              return -EFAULT;
> +        spin_lock(&current->domain->event_lock);
>          rc = evtchn_unmask(unmask.port);
> +        spin_unlock(&current->domain->event_lock);
>          break;
>      }
>  
> --- 2011-04-29.orig/xen/common/radix-tree.c
> +++ 2011-04-29/xen/common/radix-tree.c
> @@ -225,7 +225,8 @@ EXPORT_SYMBOL(radix_tree_lookup);
>  
>  static unsigned int
>  __lookup(struct radix_tree_root *root, void **results, unsigned long index,
> -         unsigned int max_items, unsigned long *next_index)
> +         unsigned int max_items, unsigned long *indexes,
> +         unsigned long *next_index)
>  {
>      unsigned int nr_found = 0;
>      unsigned int shift, height;
> @@ -235,8 +236,11 @@ __lookup(struct radix_tree_root *root, v
>      height = root->height;
>      if (index > radix_tree_maxindex(height))
>          if (height == 0) {
> -            if (root->rnode && index == 0)
> +            if (root->rnode && index == 0) {
> +                if (indexes)
> +                    indexes[nr_found] = index;
>                  results[nr_found++] = root->rnode;
> +            }
>              goto out;
>          }
>  
> @@ -265,6 +269,8 @@ __lookup(struct radix_tree_root *root, v
>      for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
>          index++;
>          if (slot->slots[i]) {
> +            if (indexes)
> +                indexes[nr_found] = index - 1;
>              results[nr_found++] = slot->slots[i];
>              if (nr_found == max_items)
>                  goto out;
> @@ -281,6 +287,7 @@ __lookup(struct radix_tree_root *root, v
>   * @results: where the results of the lookup are placed
>   * @first_index: start the lookup from this key
>   * @max_items: place up to this many items at *results
> + * @indexes: (optional) array to store indexes of items.
>   *
>   * Performs an index-ascending scan of the tree for present items.  Places
>   * them at *@results and returns the number of items which were placed at
> @@ -290,7 +297,8 @@ __lookup(struct radix_tree_root *root, v
>   */
>  unsigned int
>  radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
> -                       unsigned long first_index, unsigned int max_items)
> +                       unsigned long first_index, unsigned int max_items,
> +                       unsigned long *indexes)
>  {
>      const unsigned long max_index = radix_tree_maxindex(root->height);
>      unsigned long cur_index = first_index;
> @@ -303,7 +311,7 @@ radix_tree_gang_lookup(struct radix_tree
>          if (cur_index > max_index)
>              break;
>          nr_found = __lookup(root, results + ret, cur_index,
> -                            max_items - ret, &next_index);
> +                            max_items - ret, indexes + ret, &next_index);
>          ret += nr_found;
>          if (next_index == 0)
>              break;
> --- 2011-04-29.orig/xen/drivers/passthrough/io.c
> +++ 2011-04-29/xen/drivers/passthrough/io.c
> @@ -35,18 +35,28 @@ bool_t pt_irq_need_timer(uint32_t flags)
>      return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
>  }
>  
> +static int pt_irq_guest_eoi(struct domain *d, unsigned int pirq,
> +                            struct hvm_pirq_dpci *pirq_dpci, void *arg)
> +{
> +    if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
> +                              &pirq_dpci->flags) )
> +    {
> +        pirq_dpci->masked = 0;
> +        pirq_dpci->pending = 0;
> +        pirq_guest_eoi(d, dpci_pirq(pirq_dpci));
> +    }
> +
> +    return 0;
> +}
> +
>  static void pt_irq_time_out(void *data)
>  {
> -    struct hvm_mirq_dpci_mapping *irq_map = data;
> -    unsigned int guest_gsi, machine_gsi = 0;
> +    struct hvm_pirq_dpci *irq_map = data;
> +    unsigned int guest_gsi;
>      struct hvm_irq_dpci *dpci = NULL;
>      struct dev_intx_gsi_link *digl;
>      struct hvm_girq_dpci_mapping *girq;
>      uint32_t device, intx;
> -    unsigned int nr_pirqs = irq_map->dom->nr_pirqs;
> -    DECLARE_BITMAP(machine_gsi_map, nr_pirqs);
> -
> -    bitmap_zero(machine_gsi_map, nr_pirqs);
>  
>      spin_lock(&irq_map->dom->event_lock);
>  
> @@ -57,32 +67,18 @@ static void pt_irq_time_out(void *data)
>          guest_gsi = digl->gsi;
>          list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
>          {
> -            machine_gsi = girq->machine_gsi;
> -            set_bit(machine_gsi, machine_gsi_map);
> +            struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
> +
> +            pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
>          }
>          device = digl->device;
>          intx = digl->intx;
>          hvm_pci_intx_deassert(irq_map->dom, device, intx);
>      }
>  
> -    for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
> -          machine_gsi < nr_pirqs;
> -          machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
> -                                      machine_gsi + 1) )
> -    {
> -        clear_bit(machine_gsi, dpci->dirq_mask);
> -        dpci->mirq[machine_gsi].pending = 0;
> -    }
> +    pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
>  
>      spin_unlock(&irq_map->dom->event_lock);
> -
> -    for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
> -          machine_gsi < nr_pirqs;
> -          machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
> -                                      machine_gsi + 1) )
> -    {
> -        pirq_guest_eoi(irq_map->dom, machine_gsi);
> -    }
>  }
>  
>  struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
> @@ -95,10 +91,6 @@ struct hvm_irq_dpci *domain_get_irq_dpci
>  
>  void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
>  {
> -    xfree(dpci->mirq);
> -    xfree(dpci->dirq_mask);
> -    xfree(dpci->mapping);
> -    xfree(dpci->hvm_timer);
>      xfree(dpci);
>  }
>  
> @@ -106,7 +98,9 @@ int pt_irq_create_bind_vtd(
>      struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
>  {
>      struct hvm_irq_dpci *hvm_irq_dpci = NULL;
> -    uint32_t machine_gsi, guest_gsi;
> +    struct hvm_pirq_dpci *pirq_dpci;
> +    struct pirq *info;
> +    uint32_t guest_gsi;
>      uint32_t device, intx, link;
>      struct dev_intx_gsi_link *digl;
>      struct hvm_girq_dpci_mapping *girq;
> @@ -129,63 +123,45 @@ int pt_irq_create_bind_vtd(
>          memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
>          tasklet_init(&hvm_irq_dpci->dirq_tasklet,
>                       hvm_dirq_assist, (unsigned long)d);
> -        hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
> -                                           d->nr_pirqs);
> -        hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
> -                                                BITS_TO_LONGS(d->nr_pirqs));
> -        hvm_irq_dpci->mapping = xmalloc_array(unsigned long,
> -                                              BITS_TO_LONGS(d->nr_pirqs));
> -        hvm_irq_dpci->hvm_timer = xmalloc_array(struct timer, d->nr_pirqs);
> -        if ( !hvm_irq_dpci->mirq ||
> -             !hvm_irq_dpci->dirq_mask ||
> -             !hvm_irq_dpci->mapping ||
> -             !hvm_irq_dpci->hvm_timer)
> -        {
> -            spin_unlock(&d->event_lock);
> -            free_hvm_irq_dpci(hvm_irq_dpci);
> -            return -ENOMEM;
> -        }
> -        memset(hvm_irq_dpci->mirq, 0,
> -               d->nr_pirqs * sizeof(*hvm_irq_dpci->mirq));
> -        bitmap_zero(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
> -        bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs);
> -        memset(hvm_irq_dpci->hvm_timer, 0,
> -                d->nr_pirqs * sizeof(*hvm_irq_dpci->hvm_timer));
> -        for ( int i = 0; i < d->nr_pirqs; i++ ) {
> -            INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
> -            hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1;
> -        }
>          for ( int i = 0; i < NR_HVM_IRQS; i++ )
>              INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
>  
>          d->arch.hvm_domain.irq.dpci = hvm_irq_dpci;
>      }
>  
> +    info = pirq_get_info(d, pirq);
> +    if ( !info )
> +    {
> +        spin_unlock(&d->event_lock);
> +        return -ENOMEM;
> +    }
> +    pirq_dpci = pirq_dpci(info);
> +
>      if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
>      {
>          uint8_t dest, dest_mode;
>          int dest_vcpu_id;
>  
> -        if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
> +        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>          {
> -            hvm_irq_dpci->mirq[pirq].flags = HVM_IRQ_DPCI_MACH_MSI |
> -                                             HVM_IRQ_DPCI_GUEST_MSI;
> -            hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
> -            hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
> +            pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
> +                               HVM_IRQ_DPCI_GUEST_MSI;
> +            pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
> +            pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags;
>              /* bind after hvm_irq_dpci is setup to avoid race with irq 
> handler*/
> -            rc = pirq_guest_bind(d->vcpu[0], pirq, 0);
> +            rc = pirq_guest_bind(d->vcpu[0], pirq, info, 0);
>              if ( rc == 0 && pt_irq_bind->u.msi.gtable )
>              {
> -                rc = msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
> +                rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
>                  if ( unlikely(rc) )
> -                    pirq_guest_unbind(d, pirq);
> +                    pirq_guest_unbind(d, pirq, info);
>              }
>              if ( unlikely(rc) )
>              {
> -                hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0;
> -                hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0;
> -                hvm_irq_dpci->mirq[pirq].flags = 0;
> -                clear_bit(pirq, hvm_irq_dpci->mapping);
> +                pirq_dpci->gmsi.gflags = 0;
> +                pirq_dpci->gmsi.gvec = 0;
> +                pirq_dpci->flags = 0;
> +                pirq_cleanup_check(info, d, pirq);
>                  spin_unlock(&d->event_lock);
>                  return rc;
>              }
> @@ -194,34 +170,33 @@ int pt_irq_create_bind_vtd(
>          {
>              uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
>  
> -            if ( (hvm_irq_dpci->mirq[pirq].flags & mask) != mask)
> +            if ( (pirq_dpci->flags & mask) != mask)
>              {
>            spin_unlock(&d->event_lock);
>    return -EBUSY;
>              }
>  
>              /* if pirq is already mapped as vmsi, update the guest data/addr 
> */
> -            if ( hvm_irq_dpci->mirq[pirq].gmsi.gvec != 
> pt_irq_bind->u.msi.gvec ||
> -                    hvm_irq_dpci->mirq[pirq].gmsi.gflags != 
> pt_irq_bind->u.msi.gflags) {
> +            if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
> +                 pirq_dpci->gmsi.gflags != pt_irq_bind->u.msi.gflags) {
>                  /* Directly clear pending EOIs before enabling new MSI info. 
> */
> -                pirq_guest_eoi(d, pirq);
> +                pirq_guest_eoi(d, info);
>  
> -                hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
> -                hvm_irq_dpci->mirq[pirq].gmsi.gflags = 
> pt_irq_bind->u.msi.gflags;
> +                pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
> +                pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags;
>              }
>          }
>          /* Caculate dest_vcpu_id for MSI-type pirq migration */
> -        dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
> -        dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
> +        dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
> +        dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
>          dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
> -        hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id;
> +        pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
>          spin_unlock(&d->event_lock);
>          if ( dest_vcpu_id >= 0 )
>              hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
>      }
>      else
>      {
> -        machine_gsi = pt_irq_bind->machine_irq;
>          device = pt_irq_bind->u.pci.device;
>          intx = pt_irq_bind->u.pci.intx;
>          guest_gsi = hvm_pci_intx_gsi(device, intx);
> @@ -247,50 +222,51 @@ int pt_irq_create_bind_vtd(
>          digl->intx = intx;
>          digl->gsi = guest_gsi;
>          digl->link = link;
> -        list_add_tail(&digl->list,
> -                      &hvm_irq_dpci->mirq[machine_gsi].digl_list);
> +        list_add_tail(&digl->list, &pirq_dpci->digl_list);
>  
>          girq->device = device;
>          girq->intx = intx;
> -        girq->machine_gsi = machine_gsi;
> +        girq->machine_gsi = pirq;
>          list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
>  
>          /* Bind the same mirq once in the same domain */
> -        if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
> +        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>          {
>              unsigned int share;
>  
> -            hvm_irq_dpci->mirq[machine_gsi].dom = d;
> +            pirq_dpci->dom = d;
>              if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
>              {
> -                hvm_irq_dpci->mirq[machine_gsi].flags = 
> HVM_IRQ_DPCI_MACH_MSI 
> |
> -                                                        
> HVM_IRQ_DPCI_GUEST_PCI |
> -                                                        
> HVM_IRQ_DPCI_TRANSLATE;
> +                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
> +                                   HVM_IRQ_DPCI_MACH_MSI |
> +                                   HVM_IRQ_DPCI_GUEST_PCI |
> +                                   HVM_IRQ_DPCI_TRANSLATE;
>                  share = 0;
>              }
>              else    /* PT_IRQ_TYPE_PCI */
>              {
> -                hvm_irq_dpci->mirq[machine_gsi].flags = 
> HVM_IRQ_DPCI_MACH_PCI 
> |
> -                                                        
> HVM_IRQ_DPCI_GUEST_PCI;
> +                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
> +                                   HVM_IRQ_DPCI_MACH_PCI |
> +                                   HVM_IRQ_DPCI_GUEST_PCI;
>                  share = BIND_PIRQ__WILL_SHARE;
>              }
>  
>              /* Init timer before binding */
> -            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
> -                init_timer(&hvm_irq_dpci->hvm_timer[machine_gsi],
> -                           pt_irq_time_out, 
> &hvm_irq_dpci->mirq[machine_gsi], 
> 0);
> +            if ( pt_irq_need_timer(pirq_dpci->flags) )
> +                init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
>              /* Deal with gsi for legacy devices */
> -            rc = pirq_guest_bind(d->vcpu[0], machine_gsi, share);
> +            rc = pirq_guest_bind(d->vcpu[0], pirq, info, share);
>              if ( unlikely(rc) )
>              {
> -                if ( 
> pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) 
> )
> -                    kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
> -                hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
> -                clear_bit(machine_gsi, hvm_irq_dpci->mapping);
> +                if ( pt_irq_need_timer(pirq_dpci->flags) )
> +                    kill_timer(&pirq_dpci->timer);
> +                pirq_dpci->dom = NULL;
>                  list_del(&girq->list);
>                  xfree(girq);
>                  list_del(&digl->list);
>                  hvm_irq_dpci->link_cnt[link]--;
> +                pirq_dpci->flags = 0;
> +                pirq_cleanup_check(info, d, pirq);
>                  spin_unlock(&d->event_lock);
>                  xfree(digl);
>                  return rc;
> @@ -302,7 +278,7 @@ int pt_irq_create_bind_vtd(
>          if ( iommu_verbose )
>              dprintk(VTDPREFIX,
>                      "d%d: bind: m_gsi=%u g_gsi=%u device=%u intx=%u\n",
> -                    d->domain_id, machine_gsi, guest_gsi, device, intx);
> +                    d->domain_id, pirq, guest_gsi, device, intx);
>      }
>      return 0;
>  }
> @@ -311,11 +287,12 @@ int pt_irq_destroy_bind_vtd(
>      struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
>  {
>      struct hvm_irq_dpci *hvm_irq_dpci = NULL;
> +    struct hvm_pirq_dpci *pirq_dpci;
>      uint32_t machine_gsi, guest_gsi;
>      uint32_t device, intx, link;
> -    struct list_head *digl_list, *tmp;
> -    struct dev_intx_gsi_link *digl;
> +    struct dev_intx_gsi_link *digl, *tmp;
>      struct hvm_girq_dpci_mapping *girq;
> +    struct pirq *pirq;
>  
>      machine_gsi = pt_irq_bind->machine_irq;
>      device = pt_irq_bind->u.pci.device;
> @@ -350,14 +327,14 @@ int pt_irq_destroy_bind_vtd(
>          }
>      }
>  
> +    pirq = pirq_info(d, machine_gsi);
> +    pirq_dpci = pirq_dpci(pirq);
> +
>      /* clear the mirq info */
> -    if ( test_bit(machine_gsi, hvm_irq_dpci->mapping))
> +    if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>      {
> -        list_for_each_safe ( digl_list, tmp,
> -                &hvm_irq_dpci->mirq[machine_gsi].digl_list )
> +        list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
>          {
> -            digl = list_entry(digl_list,
> -                    struct dev_intx_gsi_link, list);
>              if ( digl->device == device &&
>                   digl->intx   == intx &&
>                   digl->link   == link &&
> @@ -368,15 +345,15 @@ int pt_irq_destroy_bind_vtd(
>              }
>          }
>  
> -        if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
> +        if ( list_empty(&pirq_dpci->digl_list) )
>          {
> -            pirq_guest_unbind(d, machine_gsi);
> -            msixtbl_pt_unregister(d, machine_gsi);
> -            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
> -                kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
> -            hvm_irq_dpci->mirq[machine_gsi].dom   = NULL;
> -            hvm_irq_dpci->mirq[machine_gsi].flags = 0;
> -            clear_bit(machine_gsi, hvm_irq_dpci->mapping);
> +            pirq_guest_unbind(d, machine_gsi, pirq);
> +            msixtbl_pt_unregister(d, pirq);
> +            if ( pt_irq_need_timer(pirq_dpci->flags) )
> +                kill_timer(&pirq_dpci->timer);
> +            pirq_dpci->dom   = NULL;
> +            pirq_dpci->flags = 0;
> +            pirq_cleanup_check(pirq, d, machine_gsi);
>          }
>      }
>      spin_unlock(&d->event_lock);
> @@ -389,120 +366,156 @@ int pt_irq_destroy_bind_vtd(
>      return 0;
>  }
>  
> -int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
> +void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
> +{
> +    INIT_LIST_HEAD(&dpci->digl_list);
> +    dpci->gmsi.dest_vcpu_id = -1;
> +}
> +
> +bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
> +{
> +    return !dpci->flags;
> +}
> +
> +int pt_pirq_iterate(struct domain *d,
> +                    int (*cb)(struct domain *, unsigned int,
> +                              struct hvm_pirq_dpci *, void *),
> +                    void *arg)
> +{
> +    int rc = 0;
> +    unsigned int pirq = 0, n, i;
> +    unsigned long indexes[8];
> +    struct pirq *pirqs[ARRAY_SIZE(indexes)];
> +
> +    ASSERT(spin_is_locked(&d->event_lock));
> +
> +    do {
> +        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
> +                                   ARRAY_SIZE(pirqs), indexes);
> +        for ( i = 0; i < n; ++i )
> +        {
> +            struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
> +
> +            pirq = indexes[i];
> +            if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
> +                rc = cb(d, pirq, pirq_dpci, arg);
> +        }
> +    } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
> +
> +    return rc;
> +}
> +
> +int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
>  {
>      struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
> +    struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
>  
> -    ASSERT(spin_is_locked(&irq_desc[domain_pirq_to_irq(d, mirq)].lock));
> -    if ( !iommu_enabled || !dpci || !test_bit(mirq, dpci->mapping))
> +    if ( !iommu_enabled || !dpci || !pirq_dpci ||
> +         !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>          return 0;
>  
> -    set_bit(mirq, dpci->dirq_mask);
> +    pirq_dpci->masked = 1;
>      tasklet_schedule(&dpci->dirq_tasklet);
>      return 1;
>  }
>  
>  #ifdef SUPPORT_MSI_REMAPPING
>  /* called with d->event_lock held */
> -static void __msi_pirq_eoi(struct domain *d, int pirq)
> +static void __msi_pirq_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
>  {
> -    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
>      irq_desc_t *desc;
>  
> -    if ( ( pirq >= 0 ) && ( pirq < d->nr_pirqs ) &&
> -         test_bit(pirq, hvm_irq_dpci->mapping) &&
> -         ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) )
> +    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
> +         (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
>      {
> +        struct pirq *pirq = dpci_pirq(pirq_dpci);
> +
>           BUG_ON(!local_irq_is_enabled());
> -         desc = domain_spin_lock_irq_desc(d, pirq, NULL);
> +         desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
>           if ( !desc )
>              return;
>  
>           desc->status &= ~IRQ_INPROGRESS;
> -         spin_unlock_irq(&desc->lock);
> +         desc_guest_eoi(d, desc, pirq);
> +    }
> +}
>  
> -         pirq_guest_eoi(d, pirq);
> +static int _hvm_dpci_msi_eoi(struct domain *d, unsigned int pirq,
> +                             struct hvm_pirq_dpci *pirq_dpci, void *arg)
> +{
> +    int vector = (long)arg;
> +
> +    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
> +         (pirq_dpci->gmsi.gvec == vector) )
> +    {
> +        int dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
> +        int dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
> +
> +        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
> +                               dest_mode) )
> +        {
> +            __msi_pirq_eoi(d, pirq_dpci);
> +            return 1;
> +        }
>      }
> +
> +    return 0;
>  }
>  
>  void hvm_dpci_msi_eoi(struct domain *d, int vector)
>  {
> -    int pirq, dest, dest_mode;
> -    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
> -
> -    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
> +    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
>         return;
>  
>      spin_lock(&d->event_lock);
> -    for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
> -          pirq < d->nr_pirqs;
> -          pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) 
> )
> -    {
> -        if ( (!(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI)) ||
> -                (hvm_irq_dpci->mirq[pirq].gmsi.gvec != vector) )
> -            continue;
> -
> -        dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
> -        dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
> -        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, 
> dest_mode) )
> -            break;
> -    }
> -    if ( pirq < d->nr_pirqs )
> -        __msi_pirq_eoi(d, pirq);
> +    pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
>      spin_unlock(&d->event_lock);
>  }
>  
> -extern int vmsi_deliver(struct domain *d, int pirq);
> -static int hvm_pci_msi_assert(struct domain *d, int pirq)
> +static int hvm_pci_msi_assert(struct domain *d,
> +                              struct hvm_pirq_dpci *pirq_dpci)
>  {
> +    struct pirq *pirq = dpci_pirq(pirq_dpci);
> +
>      if ( hvm_domain_use_pirq(d, pirq) )
>          return send_guest_pirq(d, pirq);
>      else
> -        return vmsi_deliver(d, pirq);
> +        return vmsi_deliver(d, pirq_dpci);
>  }
>  #endif
>  
> -static void hvm_dirq_assist(unsigned long _d)
> +static int _hvm_dirq_assist(struct domain *d, unsigned int pirq,
> +                            struct hvm_pirq_dpci *pirq_dpci, void *arg)
>  {
> -    unsigned int pirq;
>      uint32_t device, intx;
> -    struct domain *d = (struct domain *)_d;
> -    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
>      struct dev_intx_gsi_link *digl;
>  
> -    ASSERT(hvm_irq_dpci);
> -
> -    for ( pirq = find_first_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
> -          pirq < d->nr_pirqs;
> -          pirq = find_next_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs, pirq + 
> 1) )
> +    if ( test_and_clear_bool(pirq_dpci->masked) )
>      {
> -        if ( !test_and_clear_bit(pirq, hvm_irq_dpci->dirq_mask) )
> -            continue;
> -
> -        spin_lock(&d->event_lock);
>  #ifdef SUPPORT_MSI_REMAPPING
> -        if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI )
> +        if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
>          {
> -            hvm_pci_msi_assert(d, pirq);
> -            spin_unlock(&d->event_lock);
> -            continue;
> +            hvm_pci_msi_assert(d, pirq_dpci);
> +            return 0;
>          }
>  #endif
> -        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[pirq].digl_list, 
> list 
> )
> +        list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
>          {
> +            struct pirq *info = dpci_pirq(pirq_dpci);
> +
>              device = digl->device;
>              intx = digl->intx;
> -            if ( hvm_domain_use_pirq(d, pirq) )
> -                send_guest_pirq(d, pirq);
> +            if ( hvm_domain_use_pirq(d, info) )
> +                send_guest_pirq(d, info);
>              else
>                  hvm_pci_intx_assert(d, device, intx);
> -            hvm_irq_dpci->mirq[pirq].pending++;
> +            pirq_dpci->pending++;
>  
>  #ifdef SUPPORT_MSI_REMAPPING
> -            if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_TRANSLATE )
> +            if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
>              {
>                  /* for translated MSI to INTx interrupt, eoi as early as 
> possible */
> -                __msi_pirq_eoi(d, pirq);
> +                __msi_pirq_eoi(d, pirq_dpci);
>              }
>  #endif
>          }
> @@ -514,37 +527,50 @@ static void hvm_dirq_assist(unsigned lon
>           * guest will never deal with the irq, then the physical interrupt 
> line
>           * will never be deasserted.
>           */
> -        if ( pt_irq_need_timer(hvm_irq_dpci->mirq[pirq].flags) )
> -            set_timer(&hvm_irq_dpci->hvm_timer[pirq],
> -                      NOW() + PT_IRQ_TIME_OUT);
> -        spin_unlock(&d->event_lock);
> +        if ( pt_irq_need_timer(pirq_dpci->flags) )
> +            set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
>      }
> +
> +    return 0;
> +}
> +
> +static void hvm_dirq_assist(unsigned long _d)
> +{
> +    struct domain *d = (struct domain *)_d;
> +
> +    ASSERT(d->arch.hvm_domain.irq.dpci);
> +
> +    spin_lock(&d->event_lock);
> +    pt_pirq_iterate(d, _hvm_dirq_assist, NULL);
> +    spin_unlock(&d->event_lock);
>  }
>  
>  static void __hvm_dpci_eoi(struct domain *d,
> -                           struct hvm_irq_dpci *hvm_irq_dpci,
>                             struct hvm_girq_dpci_mapping *girq,
>                             union vioapic_redir_entry *ent)
>  {
> -    uint32_t device, intx, machine_gsi;
> +    uint32_t device, intx;
> +    struct pirq *pirq;
> +    struct hvm_pirq_dpci *pirq_dpci;
>  
>      device = girq->device;
>      intx = girq->intx;
>      hvm_pci_intx_deassert(d, device, intx);
>  
> -    machine_gsi = girq->machine_gsi;
> +    pirq = pirq_info(d, girq->machine_gsi);
> +    pirq_dpci = pirq_dpci(pirq);
>  
>      /*
>       * No need to get vector lock for timer
>       * since interrupt is still not EOIed
>       */
> -    if ( --hvm_irq_dpci->mirq[machine_gsi].pending ||
> +    if ( --pirq_dpci->pending ||
>           ( ent && ent->fields.mask ) ||
> -         ! pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
> +         ! pt_irq_need_timer(pirq_dpci->flags) )
>          return;
>  
> -    stop_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
> -    pirq_guest_eoi(d, machine_gsi);
> +    stop_timer(&pirq_dpci->timer);
> +    pirq_guest_eoi(d, pirq);
>  }
>  
>  void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
> @@ -569,7 +595,7 @@ void hvm_dpci_eoi(struct domain *d, unsi
>          goto unlock;
>  
>      list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
> -        __hvm_dpci_eoi(d, hvm_irq_dpci, girq, ent);
> +        __hvm_dpci_eoi(d, girq, ent);
>  
>  unlock:
>      spin_unlock(&d->event_lock);
> --- 2011-04-29.orig/xen/drivers/passthrough/pci.c
> +++ 2011-04-29/xen/drivers/passthrough/pci.c
> @@ -236,12 +236,28 @@ out:
>      return ret;
>  }
>  
> +static int pci_clean_dpci_irq(struct domain *d, unsigned int pirq,
> +                              struct hvm_pirq_dpci *pirq_dpci, void *arg)
> +{
> +    struct dev_intx_gsi_link *digl, *tmp;
> +
> +    pirq_guest_unbind(d, pirq, dpci_pirq(pirq_dpci));
> +
> +    if ( pt_irq_need_timer(pirq_dpci->flags) )
> +        kill_timer(&pirq_dpci->timer);
> +
> +    list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
> +    {
> +        list_del(&digl->list);
> +        xfree(digl);
> +    }
> +
> +    return 0;
> +}
> +
>  static void pci_clean_dpci_irqs(struct domain *d)
>  {
>      struct hvm_irq_dpci *hvm_irq_dpci = NULL;
> -    uint32_t i;
> -    struct list_head *digl_list, *tmp;
> -    struct dev_intx_gsi_link *digl;
>  
>      if ( !iommu_enabled )
>          return;
> @@ -255,24 +271,7 @@ static void pci_clean_dpci_irqs(struct d
>      {
>          tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
>  
> -        for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
> -              i < d->nr_pirqs;
> -              i = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, i + 1) )
> -        {
> -            pirq_guest_unbind(d, i);
> -
> -            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[i].flags) )
> -                kill_timer(&hvm_irq_dpci->hvm_timer[i]);
> -
> -            list_for_each_safe ( digl_list, tmp,
> -                                 &hvm_irq_dpci->mirq[i].digl_list )
> -            {
> -                digl = list_entry(digl_list,
> -                                  struct dev_intx_gsi_link, list);
> -                list_del(&digl->list);
> -                xfree(digl);
> -            }
> -        }
> +        pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
>  
>          d->arch.hvm_domain.irq.dpci = NULL;
>          free_hvm_irq_dpci(hvm_irq_dpci);
> --- 2011-04-29.orig/xen/drivers/passthrough/vtd/x86/vtd.c
> +++ 2011-04-29/xen/drivers/passthrough/vtd/x86/vtd.c
> @@ -68,12 +68,32 @@ void *__init map_to_nocache_virt(int nr_
>      return (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
>  }
>  
> -void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
> +static int _hvm_dpci_isairq_eoi(struct domain *d, unsigned int pirq,
> +                                struct hvm_pirq_dpci *pirq_dpci, void *arg)
>  {
>      struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
> -    struct hvm_irq_dpci *dpci = NULL;
> +    unsigned int isairq = (long)arg;
>      struct dev_intx_gsi_link *digl, *tmp;
> -    int i;
> +
> +    list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
> +    {
> +        if ( hvm_irq->pci_link.route[digl->link] == isairq )
> +        {
> +            hvm_pci_intx_deassert(d, digl->device, digl->intx);
> +            if ( --pirq_dpci->pending == 0 )
> +            {
> +                stop_timer(&pirq_dpci->timer);
> +                pirq_guest_eoi(d, dpci_pirq(pirq_dpci));
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
> +{
> +    struct hvm_irq_dpci *dpci = NULL;
>  
>      ASSERT(isairq < NR_ISAIRQS);
>      if ( !iommu_enabled)
> @@ -83,29 +103,10 @@ void hvm_dpci_isairq_eoi(struct domain *
>  
>      dpci = domain_get_irq_dpci(d);
>  
> -    if ( !dpci || !test_bit(isairq, dpci->isairq_map) )
> +    if ( dpci && test_bit(isairq, dpci->isairq_map) )
>      {
> -        spin_unlock(&d->event_lock);
> -        return;
> -    }
> -    /* Multiple mirq may be mapped to one isa irq */
> -    for ( i = find_first_bit(dpci->mapping, d->nr_pirqs);
> -          i < d->nr_pirqs;
> -          i = find_next_bit(dpci->mapping, d->nr_pirqs, i + 1) )
> -    {
> -        list_for_each_entry_safe ( digl, tmp,
> -            &dpci->mirq[i].digl_list, list )
> -        {
> -            if ( hvm_irq->pci_link.route[digl->link] == isairq )
> -            {
> -                hvm_pci_intx_deassert(d, digl->device, digl->intx);
> -                if ( --dpci->mirq[i].pending == 0 )
> -                {
> -                    stop_timer(&dpci->hvm_timer[i]);
> -                    pirq_guest_eoi(d, i);
> -                }
> -            }
> -        }
> +        /* Multiple mirq may be mapped to one isa irq */
> +        pt_pirq_iterate(d, _hvm_dpci_isairq_eoi, (void *)(long)isairq);
>      }
>      spin_unlock(&d->event_lock);
>  }
> --- 2011-04-29.orig/xen/include/asm-ia64/domain.h
> +++ 2011-04-29/xen/include/asm-ia64/domain.h
> @@ -11,6 +11,7 @@
>  #include <xen/list.h>
>  #include <xen/cpumask.h>
>  #include <xen/mm.h>
> +#include <xen/hvm/irq.h>
>  #include <asm/fpswa.h>
>  #include <xen/rangeset.h>
>  
> @@ -316,6 +317,23 @@ struct arch_vcpu {
>      cpumask_t cache_coherent_map;
>  };
>  
> +struct arch_pirq {
> +    struct hvm_pirq_dpci dpci;
> +};
> +
> +#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.dpci : NULL)
> +#define dpci_pirq(dpci) container_of(dpci, struct pirq, arch.dpci)
> +
> +#define alloc_pirq_struct(d) ({ \
> +    struct pirq *pirq = xmalloc(struct pirq); \
> +    if ( pirq ) \
> +    { \
> +        memset(pirq, 0, sizeof(*pirq)); \
> +        pt_pirq_init(d, &pirq->arch.dpci); \
> +    } \
> +    pirq; \
> +})
> +
>  #include <asm/uaccess.h> /* for KERNEL_DS */
>  #include <asm/pgtable.h>
>  
> --- 2011-04-29.orig/xen/include/asm-x86/domain.h
> +++ 2011-04-29/xen/include/asm-x86/domain.h
> @@ -286,9 +286,6 @@ struct arch_domain
>  
>      /* NB. protected by d->event_lock and by irq_desc[irq].lock */
>      struct radix_tree_root irq_pirq;
> -    int *pirq_irq;
> -    /* pirq to emulated irq */
> -    int *pirq_emuirq;
>  
>      /* Maximum physical-address bitwidth supported by this guest. */
>      unsigned int physaddr_bitsize;
> --- 2011-04-29.orig/xen/include/asm-x86/hvm/irq.h
> +++ 2011-04-29/xen/include/asm-x86/hvm/irq.h
> @@ -111,4 +111,6 @@ struct hvm_intack hvm_vcpu_ack_pending_i
>   */
>  #define SUPPORT_MSI_REMAPPING 1
>  
> +void msixtbl_pt_cleanup(struct domain *d);
> +
>  #endif /* __ASM_X86_HVM_IRQ_H__ */
> --- 2011-04-29.orig/xen/include/asm-x86/irq.h
> +++ 2011-04-29/xen/include/asm-x86/irq.h
> @@ -7,6 +7,7 @@
>  #include <asm/atomic.h>
>  #include <xen/cpumask.h>
>  #include <xen/smp.h>
> +#include <xen/hvm/irq.h>
>  #include <irq_vectors.h>
>  #include <asm/percpu.h>
>  
> @@ -105,6 +106,20 @@ extern unsigned int io_apic_irqs;
>  
>  DECLARE_PER_CPU(unsigned int, irq_count);
>  
> +struct pirq;
> +struct arch_pirq {
> +    int irq;
> +    union {
> +        struct hvm_pirq {
> +            int emuirq;
> +            struct hvm_pirq_dpci dpci;
> +        } hvm;
> +    };
> +};
> +
> +#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.hvm.dpci : NULL)
> +#define dpci_pirq(pd) container_of(pd, struct pirq, arch.hvm.dpci)
> +
>  int pirq_shared(struct domain *d , int irq);
>  
>  int map_domain_pirq(struct domain *d, int pirq, int irq, int type,
> @@ -114,7 +129,7 @@ int get_free_pirq(struct domain *d, int 
>  void free_domain_pirqs(struct domain *d);
>  int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
>  int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
> -int hvm_domain_use_pirq(struct domain *d, int irq);
> +bool_t hvm_domain_use_pirq(const struct domain *, const struct pirq *);
>  
>  int  init_irq_data(void);
>  
> @@ -146,11 +161,11 @@ void irq_set_affinity(struct irq_desc *,
>  int init_domain_irq_mapping(struct domain *);
>  void cleanup_domain_irq_mapping(struct domain *);
>  
> -#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
> +#define domain_pirq_to_irq(d, pirq) pirq_field(d, pirq, arch.irq)
>  #define domain_irq_to_pirq(d, irq) \
>      ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq))
>  #define PIRQ_ALLOCATED -1
> -#define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq])
> +#define domain_pirq_to_emuirq(d, pirq) pirq_field(d, pirq, arch.hvm.emuirq)
>  #define domain_emuirq_to_pirq(d, emuirq) \
>      (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \
>       IRQ_UNBOUND + 1) - 1)
> --- 2011-04-29.orig/xen/include/xen/domain.h
> +++ 2011-04-29/xen/include/xen/domain.h
> @@ -38,6 +38,12 @@ struct vcpu_guest_context *alloc_vcpu_gu
>  void free_vcpu_guest_context(struct vcpu_guest_context *);
>  #endif
>  
> +/* Allocate/free a PIRQ structure. */
> +#ifndef alloc_pirq_struct
> +struct pirq *alloc_pirq_struct(struct domain *);
> +#endif
> +#define free_pirq_struct xfree
> +
>  /*
>   * Initialise/destroy arch-specific details of a VCPU.
>   *  - vcpu_initialise() is called after the basic generic fields of the
> --- 2011-04-29.orig/xen/include/xen/event.h
> +++ 2011-04-29/xen/include/xen/event.h
> @@ -36,7 +36,7 @@ void send_guest_global_virq(struct domai
>   *  @pirq:     Physical IRQ number
>   * Returns TRUE if the delivery port was already pending.
>   */
> -int send_guest_pirq(struct domain *d, int pirq);
> +int send_guest_pirq(struct domain *, const struct pirq *);
>  
>  /* Send a notification from a given domain's event-channel port. */
>  int evtchn_send(struct domain *d, unsigned int lport);
> --- 2011-04-29.orig/xen/include/xen/hvm/irq.h
> +++ 2011-04-29/xen/include/xen/hvm/irq.h
> @@ -25,7 +25,7 @@
>  #include <xen/types.h>
>  #include <xen/spinlock.h>
>  #include <xen/tasklet.h>
> -#include <asm/irq.h>
> +#include <xen/timer.h>
>  #include <public/hvm/save.h>
>  
>  struct dev_intx_gsi_link {
> @@ -38,11 +38,15 @@ struct dev_intx_gsi_link {
>  
>  #define _HVM_IRQ_DPCI_MACH_PCI_SHIFT            0
>  #define _HVM_IRQ_DPCI_MACH_MSI_SHIFT            1
> +#define _HVM_IRQ_DPCI_MAPPED_SHIFT              2
> +#define _HVM_IRQ_DPCI_EOI_LATCH_SHIFT           3
>  #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT           4
>  #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT           5
>  #define _HVM_IRQ_DPCI_TRANSLATE_SHIFT          15
>  #define HVM_IRQ_DPCI_MACH_PCI        (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT)
>  #define HVM_IRQ_DPCI_MACH_MSI        (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT)
> +#define HVM_IRQ_DPCI_MAPPED          (1 << _HVM_IRQ_DPCI_MAPPED_SHIFT)
> +#define HVM_IRQ_DPCI_EOI_LATCH       (1 << _HVM_IRQ_DPCI_EOI_LATCH_SHIFT)
>  #define HVM_IRQ_DPCI_GUEST_PCI       (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT)
>  #define HVM_IRQ_DPCI_GUEST_MSI       (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT)
>  #define HVM_IRQ_DPCI_TRANSLATE       (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
> @@ -63,14 +67,6 @@ struct hvm_gmsi_info {
>      int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
>  };
>  
> -struct hvm_mirq_dpci_mapping {
> -    uint32_t flags;
> -    int pending;
> -    struct list_head digl_list;
> -    struct domain *dom;
> -    struct hvm_gmsi_info gmsi;
> -};
> -
>  struct hvm_girq_dpci_mapping {
>      struct list_head list;
>      uint8_t device;
> @@ -88,20 +84,33 @@ struct hvm_girq_dpci_mapping {
>  
>  /* Protected by domain's event_lock */
>  struct hvm_irq_dpci {
> -    /* Machine IRQ to guest device/intx mapping. */
> -    unsigned long *mapping;
> -    struct hvm_mirq_dpci_mapping *mirq;
> -    unsigned long *dirq_mask;
>      /* Guest IRQ to guest device/intx mapping. */
>      struct list_head girq[NR_HVM_IRQS];
>      /* Record of mapped ISA IRQs */
>      DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
>      /* Record of mapped Links */
>      uint8_t link_cnt[NR_LINK];
> -    struct timer *hvm_timer;
>      struct tasklet dirq_tasklet;
>  };
>  
> +/* Machine IRQ to guest device/intx mapping. */
> +struct hvm_pirq_dpci {
> +    uint32_t flags;
> +    bool_t masked;
> +    uint16_t pending;
> +    struct list_head digl_list;
> +    struct domain *dom;
> +    struct hvm_gmsi_info gmsi;
> +    struct timer timer;
> +};
> +
> +void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
> +bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *);
> +int pt_pirq_iterate(struct domain *d,
> +                    int (*cb)(struct domain *, unsigned int pirq,
> +                              struct hvm_pirq_dpci *, void *arg),
> +                    void *arg);
> +
>  /* Modify state of a PCI INTx wire. */
>  void hvm_pci_intx_assert(
>      struct domain *d, unsigned int device, unsigned int intx);
> @@ -120,4 +129,6 @@ void hvm_maybe_deassert_evtchn_irq(void)
>  void hvm_assert_evtchn_irq(struct vcpu *v);
>  void hvm_set_callback_via(struct domain *d, uint64_t via);
>  
> +int vmsi_deliver(struct domain *, const struct hvm_pirq_dpci *);
> +
>  #endif /* __XEN_HVM_IRQ_H__ */
> --- 2011-04-29.orig/xen/include/xen/iommu.h
> +++ 2011-04-29/xen/include/xen/iommu.h
> @@ -88,7 +88,9 @@ int iommu_unmap_page(struct domain *d, u
>  void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int 
> present);
>  void iommu_set_pgd(struct domain *d);
>  void iommu_domain_teardown(struct domain *d);
> -int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
> +
> +struct pirq;
> +int hvm_do_IRQ_dpci(struct domain *, struct pirq *);
>  int dpci_ioport_intercept(ioreq_t *p);
>  int pt_irq_create_bind_vtd(struct domain *d,
>                             xen_domctl_bind_pt_irq_t *pt_irq_bind);
> --- 2011-04-29.orig/xen/include/xen/irq.h
> +++ 2011-04-29/xen/include/xen/irq.h
> @@ -135,13 +135,41 @@ extern void no_action(int cpl, void *dev
>  
>  struct domain;
>  struct vcpu;
> -extern int pirq_guest_eoi(struct domain *d, int irq);
> +
> +struct pirq {
> +    u16 evtchn;
> +    bool_t masked;
> +    struct arch_pirq arch;
> +};
> +
> +#define pirq_info(d, p) ((struct pirq *)radix_tree_lookup(&(d)->pirq_tree, 
> p))
> +
> +/* Use this instead of pirq_info() if the structure may need allocating. */
> +extern struct pirq *pirq_get_info(struct domain *, int pirq);
> +
> +#define pirq_field(d, p, f) ({ \
> +    const struct pirq *__pi = pirq_info(d, p); \
> +    __pi ? __pi->f : 0; \
> +})
> +#define pirq_to_evtchn(d, pirq) pirq_field(d, pirq, evtchn)
> +#define pirq_masked(d, pirq) pirq_field(d, pirq, masked)
> +
> +void pirq_cleanup_check(struct pirq *, struct domain *, int);
> +
> +#define pirq_cleanup_check(info, d, pirq) \
> +    ((info)->evtchn ? pirq_cleanup_check(info, d, pirq) : (void)0)
> +
> +extern void pirq_guest_eoi(struct domain *, struct pirq *);
> +extern void desc_guest_eoi(struct domain *, struct irq_desc *, struct pirq 
> *);
>  extern int pirq_guest_unmask(struct domain *d);
> -extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
> -extern void pirq_guest_unbind(struct domain *d, int irq);
> +extern int pirq_guest_bind(struct vcpu *, int pirq, struct pirq *,
> +    int will_share);
> +extern void pirq_guest_unbind(struct domain *d, int pirq, struct pirq *);
>  extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
>  extern irq_desc_t *domain_spin_lock_irq_desc(
>      struct domain *d, int irq, unsigned long *pflags);
> +extern irq_desc_t *pirq_spin_lock_irq_desc(
> +    struct domain *, const struct pirq *, unsigned long *pflags);
>  
>  static inline void set_native_irq_info(unsigned int irq, const cpumask_t 
> *mask)
>  {
> --- 2011-04-29.orig/xen/include/xen/pci.h
> +++ 2011-04-29/xen/include/xen/pci.h
> @@ -117,8 +117,9 @@ int pci_find_cap_offset(u8 bus, u8 dev, 
>  int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap);
>  int pci_find_ext_capability(int seg, int bus, int devfn, int cap);
>  
> -int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
> -void msixtbl_pt_unregister(struct domain *d, int pirq);
> +struct pirq;
> +int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
> +void msixtbl_pt_unregister(struct domain *, struct pirq *);
>  void pci_enable_acs(struct pci_dev *pdev);
>  
>  #endif /* __XEN_PCI_H__ */
> --- 2011-04-29.orig/xen/include/xen/radix-tree.h
> +++ 2011-04-29/xen/include/xen/radix-tree.h
> @@ -72,6 +72,7 @@ void *radix_tree_delete(struct radix_tre
>                          void(*node_free)(struct radix_tree_node *));
>  unsigned int
>  radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
> -                       unsigned long first_index, unsigned int max_items);
> +                       unsigned long first_index, unsigned int max_items,
> +                       unsigned long *indexes);
>  
>  #endif /* _XEN_RADIX_TREE_H */
> --- 2011-04-29.orig/xen/include/xen/sched.h
> +++ 2011-04-29/xen/include/xen/sched.h
> @@ -21,6 +21,7 @@
>  #include <xen/irq.h>
>  #include <xen/mm.h>
>  #include <xen/tasklet.h>
> +#include <xen/radix-tree.h>
>  #include <public/mem_event.h>
>  #include <xen/cpumask.h>
>  #include <xen/nodemask.h>
> @@ -234,13 +235,11 @@ struct domain
>      struct grant_table *grant_table;
>  
>      /*
> -     * Interrupt to event-channel mappings. Updates should be protected by 
> the 
> -     * domain's event-channel spinlock. Read accesses can also synchronise 
> on 
> -     * the lock, but races don't usually matter.
> +     * Interrupt to event-channel mappings and other per-guest-pirq data.
> +     * Protected by the domain's event-channel spinlock.
>       */
>      unsigned int     nr_pirqs;
> -    u16             *pirq_to_evtchn;
> -    unsigned long   *pirq_mask;
> +    struct radix_tree_root pirq_tree;
>  
>      /* I/O capabilities (access to IRQs and memory-mapped I/O). */
>      struct rangeset *iomem_caps;
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.