[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees


  • To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Tue, 03 May 2011 22:08:34 +0100
  • Cc: Allen M Kay <allen.m.kay@xxxxxxxxx>
  • Delivery-date: Tue, 03 May 2011 14:09:25 -0700
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:user-agent:date:subject:from:to:cc:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; b=SsegtLfHTSIpiGxxMYjP331Hps9Me5okKMqPF8+YeK/9aPzXrh2bLta4a2GesMSQoH BTmA/e3XX4Xp5KUtcQGh0Ib94I06dR4UrnzL6CC/ho2+zjcA75MzwlZzf039lvMHq75X 80V5dSx5/4zx5FESslvY9Fin0U6UUtwbARehA=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcwJ1kMUJMmam3PrMEC8xfc6rwciwA==
  • Thread-topic: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees

On 03/05/2011 15:08, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> It would seem possible to fold the two trees into one (making e.g. the
> emuirq bits stored in the upper half of the pointer), but I'm not
> certain that's worth it as it would make deletion of entries more
> cumbersome. Unless pirq-s and emuirq-s were mutually exclusive...
> 
> v2: Split setup/teardown into two stages - (de-)allocation (tree node
> (de-)population) is done with just d->event_lock held (and hence
> interrupts enabled), while actual insertion/removal of translation data
> gets done with irq_desc's lock held (and interrupts disabled).

This is mostly okay, because the only operations that occur with irqs
disabled are read-only on the radix-rtree structure itself, hence no
alloc/dealloc will happen. *However* those calls to
radix_tree_lookup[_slot]() are not synchronised wrt your calls to
radix_tree_{insert,delete}(). The latter hold d->event_lock, while the
former do not. Hence you need RCU and you need a new first patch in your
patch set to pull in a modern radix-tree.[ch] from upstream Linux.

 -- keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- 2011-04-29.orig/xen/arch/x86/domain.c
> +++ 2011-04-29/xen/arch/x86/domain.c
> @@ -614,26 +614,16 @@ int arch_domain_create(struct domain *d,
>          memset(d->arch.pirq_irq, 0,
>                 d->nr_pirqs * sizeof(*d->arch.pirq_irq));
>  
> -        d->arch.irq_pirq = xmalloc_array(int, nr_irqs);
> -        if ( !d->arch.irq_pirq )
> +        if ( (rc = init_domain_irq_mapping(d)) != 0 )
>              goto fail;
> -        memset(d->arch.irq_pirq, 0,
> -               nr_irqs * sizeof(*d->arch.irq_pirq));
> -
> -        for ( i = 1; platform_legacy_irq(i); ++i )
> -            if ( !IO_APIC_IRQ(i) )
> -                d->arch.irq_pirq[i] = d->arch.pirq_irq[i] = i;
>  
>          if ( is_hvm_domain(d) )
>          {
>              d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs);
> -            d->arch.emuirq_pirq = xmalloc_array(int, nr_irqs);
> -            if ( !d->arch.pirq_emuirq || !d->arch.emuirq_pirq )
> +            if ( !d->arch.pirq_emuirq )
>                  goto fail;
>              for (i = 0; i < d->nr_pirqs; i++)
>                  d->arch.pirq_emuirq[i] = IRQ_UNBOUND;
> -            for (i = 0; i < nr_irqs; i++)
> -                d->arch.emuirq_pirq[i] = IRQ_UNBOUND;
>          }
>  
>  
> @@ -671,9 +661,8 @@ int arch_domain_create(struct domain *d,
>      d->is_dying = DOMDYING_dead;
>      vmce_destroy_msr(d);
>      xfree(d->arch.pirq_irq);
> -    xfree(d->arch.irq_pirq);
>      xfree(d->arch.pirq_emuirq);
> -    xfree(d->arch.emuirq_pirq);
> +    cleanup_domain_irq_mapping(d);
>      free_xenheap_page(d->shared_info);
>      if ( paging_initialised )
>          paging_final_teardown(d);
> @@ -726,9 +715,8 @@ void arch_domain_destroy(struct domain *
>  
>      free_xenheap_page(d->shared_info);
>      xfree(d->arch.pirq_irq);
> -    xfree(d->arch.irq_pirq);
>      xfree(d->arch.pirq_emuirq);
> -    xfree(d->arch.emuirq_pirq);
> +    cleanup_domain_irq_mapping(d);
>  }
>  
>  unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long
> guest_cr4)
> --- 2011-04-29.orig/xen/arch/x86/irq.c
> +++ 2011-04-29/xen/arch/x86/irq.c
> @@ -950,6 +950,64 @@ struct irq_desc *domain_spin_lock_irq_de
>      return desc;
>  }
>  
> +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL,
> +                                NULL, NULL);
> +
> +    return err != -EEXIST ? err : 0;
> +}
> +
> +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq;
> +    d->arch.pirq_irq[pirq] = irq;
> +}
> +
> +static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    d->arch.pirq_irq[pirq] = 0;
> +    *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL;
> +}
> +
> +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    radix_tree_delete(&d->arch.irq_pirq, irq, NULL);
> +}
> +
> +int init_domain_irq_mapping(struct domain *d)
> +{
> +    unsigned int i;
> +    int err = 0;
> +
> +    INIT_RADIX_TREE(&d->arch.irq_pirq, 0);
> +    if ( is_hvm_domain(d) )
> +        INIT_RADIX_TREE(&d->arch.hvm_domain.emuirq_pirq, 0);
> +
> +    for ( i = 1; platform_legacy_irq(i); ++i )
> +        if ( !IO_APIC_IRQ(i) )
> +        {
> +            err = prepare_domain_irq_pirq(d, i, i);
> +            if ( err )
> +                break;
> +            set_domain_irq_pirq(d, i, i);
> +        }
> +
> +    return err;
> +}
> +
> +static void irq_slot_free(void *unused)
> +{
> +}
> +
> +void cleanup_domain_irq_mapping(struct domain *d)
> +{
> +    radix_tree_destroy(&d->arch.irq_pirq, irq_slot_free, NULL);
> +    if ( is_hvm_domain(d) )
> +        radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq,
> +                           irq_slot_free, NULL);
> +}
> +
>  /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
>  static void flush_ready_eoi(void)
>  {
> @@ -1373,7 +1431,7 @@ void pirq_guest_unbind(struct domain *d,
>  {
>      irq_guest_action_t *oldaction = NULL;
>      struct irq_desc *desc;
> -    int irq;
> +    int irq = 0;
>  
>      WARN_ON(!spin_is_locked(&d->event_lock));
>  
> @@ -1386,7 +1444,7 @@ void pirq_guest_unbind(struct domain *d,
>          BUG_ON(irq <= 0);
>          desc = irq_to_desc(irq);
>          spin_lock_irq(&desc->lock);
> -        d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0;
> +        clear_domain_irq_pirq(d, irq, pirq);
>      }
>      else
>      {
> @@ -1400,6 +1458,8 @@ void pirq_guest_unbind(struct domain *d,
>          kill_timer(&oldaction->eoi_timer);
>          xfree(oldaction);
>      }
> +    else if ( irq > 0 )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
>  }
>  
>  static int pirq_guest_force_unbind(struct domain *d, int irq)
> @@ -1523,6 +1583,10 @@ int map_domain_pirq(
>          return ret;
>      }
>  
> +    ret = prepare_domain_irq_pirq(d, irq, pirq);
> +    if ( ret )
> +        return ret;
> +
>      desc = irq_to_desc(irq);
>  
>      if ( type == MAP_PIRQ_TYPE_MSI )
> @@ -1544,19 +1608,20 @@ int map_domain_pirq(
>              dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
>                d->domain_id, irq);
>          desc->handler = &pci_msi_type;
> -        d->arch.pirq_irq[pirq] = irq;
> -        d->arch.irq_pirq[irq] = pirq;
> +        set_domain_irq_pirq(d, irq, pirq);
>          setup_msi_irq(pdev, msi_desc, irq);
>          spin_unlock_irqrestore(&desc->lock, flags);
> -    } else
> +    }
> +    else
>      {
>          spin_lock_irqsave(&desc->lock, flags);
> -        d->arch.pirq_irq[pirq] = irq;
> -        d->arch.irq_pirq[irq] = pirq;
> +        set_domain_irq_pirq(d, irq, pirq);
>          spin_unlock_irqrestore(&desc->lock, flags);
>      }
>  
>   done:
> +    if ( ret )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
>      return ret;
>  }
>  
> @@ -1599,20 +1664,20 @@ int unmap_domain_pirq(struct domain *d,
>      BUG_ON(irq != domain_pirq_to_irq(d, pirq));
>  
>      if ( !forced_unbind )
> -    {
> -        d->arch.pirq_irq[pirq] = 0;
> -        d->arch.irq_pirq[irq] = 0;
> -    }
> +        clear_domain_irq_pirq(d, irq, pirq);
>      else
>      {
>          d->arch.pirq_irq[pirq] = -irq;
> -        d->arch.irq_pirq[irq] = -pirq;
> +        *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void
> *)(long)-pirq;
>      }
>  
>      spin_unlock_irqrestore(&desc->lock, flags);
>      if (msi_desc)
>          msi_free_irq(msi_desc);
>  
> +    if ( !forced_unbind )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
> +
>      ret = irq_deny_access(d, pirq);
>      if ( ret )
>          dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n",
> @@ -1829,10 +1894,25 @@ int map_domain_emuirq_pirq(struct domain
>          return 0;
>      }
>  
> -    d->arch.pirq_emuirq[pirq] = emuirq;
>      /* do not store emuirq mappings for pt devices */
>      if ( emuirq != IRQ_PT )
> -        d->arch.emuirq_pirq[emuirq] = pirq;
> +    {
> +        int err = radix_tree_insert(&d->arch.hvm_domain.emuirq_pirq, emuirq,
> +                                    (void *)((long)pirq + 1), NULL, NULL);
> +
> +        switch ( err )
> +        {
> +        case 0:
> +            break;
> +        case -EEXIST:
> +            *radix_tree_lookup_slot(&d->arch.hvm_domain.emuirq_pirq, emuirq)
> =
> +                (void *)((long)pirq + 1);
> +            break;
> +        default:
> +            return err;
> +        }
> +    }
> +    d->arch.pirq_emuirq[pirq] = emuirq;
>  
>      return 0;
>  }
> @@ -1860,7 +1940,7 @@ int unmap_domain_pirq_emuirq(struct doma
>  
>      d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND;
>      if ( emuirq != IRQ_PT )
> -        d->arch.emuirq_pirq[emuirq] = IRQ_UNBOUND;
> +        radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL);
>  
>   done:
>      return ret;
> --- 2011-04-29.orig/xen/common/radix-tree.c
> +++ 2011-04-29/xen/common/radix-tree.c
> @@ -26,7 +26,6 @@
>   * o tagging code removed
>   * o radix_tree_insert has func parameter for dynamic data struct allocation
>   * o radix_tree_destroy added (including recursive helper function)
> - * o __init functions must be called explicitly
>   * o other include files adapted to Xen
>   */
>  
> @@ -35,6 +34,7 @@
>  #include <xen/lib.h>
>  #include <xen/types.h>
>  #include <xen/errno.h>
> +#include <xen/xmalloc.h>
>  #include <xen/radix-tree.h>
>  #include <asm/cache.h>
>  
> @@ -49,6 +49,18 @@ static inline unsigned long radix_tree_m
>      return height_to_maxindex[height];
>  }
>  
> +static struct radix_tree_node *_node_alloc(void *unused)
> +{
> +    struct radix_tree_node *node = xmalloc(struct radix_tree_node);
> +
> +    return node ? memset(node, 0, sizeof(*node)) : node;
> +}
> +
> +static void _node_free(struct radix_tree_node *node)
> +{
> +    xfree(node);
> +}
> +
>  /*
>   * Extend a radix tree so it can store key @index.
>   */
> @@ -100,6 +112,9 @@ int radix_tree_insert(struct radix_tree_
>      int offset;
>      int error;
>  
> +    if (!node_alloc)
> +        node_alloc = _node_alloc;
> +
>      /* Make sure the tree is high enough.  */
>      if (index > radix_tree_maxindex(root->height)) {
>          error = radix_tree_extend(root, index, node_alloc, arg);
> @@ -336,6 +351,9 @@ void *radix_tree_delete(struct radix_tre
>      unsigned int height, shift;
>      int offset;
>  
> +    if (!node_free)
> +        node_free = _node_free;
> +
>      height = root->height;
>      if (index > radix_tree_maxindex(height))
>          goto out;
> @@ -420,6 +438,8 @@ void radix_tree_destroy(struct radix_tre
>      if (root->height == 0)
>          slot_free(root->rnode);
>      else {
> +        if (!node_free)
> +            node_free = _node_free;
>          radix_tree_node_destroy(root->rnode, root->height,
>                                  slot_free, node_free);
>          node_free(root->rnode);
> @@ -440,10 +460,14 @@ static unsigned long __init __maxindex(u
>      return index;
>  }
>  
> -void __init radix_tree_init(void)
> +static int __init radix_tree_init(void)
>  {
>      unsigned int i;
>  
>      for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
>          height_to_maxindex[i] = __maxindex(i);
> +
> +    return 0;
>  }
> +/* pre-SMP just so it runs before 'normal' initcalls */
> +presmp_initcall(radix_tree_init);
> --- 2011-04-29.orig/xen/common/tmem.c
> +++ 2011-04-29/xen/common/tmem.c
> @@ -2925,7 +2925,6 @@ static int __init init_tmem(void)
>      if ( !tmh_enabled() )
>          return 0;
>  
> -    radix_tree_init();
>      if ( tmh_dedup_enabled() )
>          for (i = 0; i < 256; i++ )
>          {
> --- 2011-04-29.orig/xen/include/asm-x86/domain.h
> +++ 2011-04-29/xen/include/asm-x86/domain.h
> @@ -3,6 +3,7 @@
>  
>  #include <xen/config.h>
>  #include <xen/mm.h>
> +#include <xen/radix-tree.h>
>  #include <asm/hvm/vcpu.h>
>  #include <asm/hvm/domain.h>
>  #include <asm/e820.h>
> @@ -284,10 +285,9 @@ struct arch_domain
>      const char *nested_p2m_function;
>  
>      /* NB. protected by d->event_lock and by irq_desc[irq].lock */
> -    int *irq_pirq;
> +    struct radix_tree_root irq_pirq;
>      int *pirq_irq;
> -    /* pirq to emulated irq and vice versa */
> -    int *emuirq_pirq;
> +    /* pirq to emulated irq */
>      int *pirq_emuirq;
>  
>      /* Maximum physical-address bitwidth supported by this guest. */
> --- 2011-04-29.orig/xen/include/asm-x86/hvm/domain.h
> +++ 2011-04-29/xen/include/asm-x86/hvm/domain.h
> @@ -59,6 +59,9 @@ struct hvm_domain {
>      /* VCPU which is current target for 8259 interrupts. */
>      struct vcpu           *i8259_target;
>  
> +    /* emulated irq to pirq */
> +    struct radix_tree_root emuirq_pirq;
> +
>      /* hvm_print_line() logging. */
>  #define HVM_PBUF_SIZE 80
>      char                  *pbuf;
> --- 2011-04-29.orig/xen/include/asm-x86/irq.h
> +++ 2011-04-29/xen/include/asm-x86/irq.h
> @@ -143,11 +143,17 @@ int bind_irq_vector(int irq, int vector,
>  
>  void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
>  
> +int init_domain_irq_mapping(struct domain *);
> +void cleanup_domain_irq_mapping(struct domain *);
> +
>  #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
> -#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
> +#define domain_irq_to_pirq(d, irq) \
> +    ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq))
>  #define PIRQ_ALLOCATED -1
>  #define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq])
> -#define domain_emuirq_to_pirq(d, emuirq) ((d)->arch.emuirq_pirq[emuirq])
> +#define domain_emuirq_to_pirq(d, emuirq) \
> +    (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \
> +     IRQ_UNBOUND + 1) - 1)
>  #define IRQ_UNBOUND -1
>  #define IRQ_PT -2
>  
> --- 2011-04-29.orig/xen/include/xen/radix-tree.h
> +++ 2011-04-29/xen/include/xen/radix-tree.h
> @@ -73,6 +73,5 @@ void *radix_tree_delete(struct radix_tre
>  unsigned int
>  radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
>                         unsigned long first_index, unsigned int max_items);
> -void radix_tree_init(void);
>  
>  #endif /* _XEN_RADIX_TREE_H */
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.