WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain ar

To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
From: Keir Fraser <keir@xxxxxxx>
Date: Tue, 03 May 2011 22:08:34 +0100
Cc: Allen M Kay <allen.m.kay@xxxxxxxxx>
Delivery-date: Tue, 03 May 2011 14:09:25 -0700
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:sender:user-agent:date:subject:from:to:cc :message-id:thread-topic:thread-index:in-reply-to:mime-version :content-type:content-transfer-encoding; bh=8vfEmH/mesKfGcFIKFHa5ExX0AW9I/VvNDyB/Atp7I8=; b=VZ4zAq5A5Y04O8CU+o2RKNEQtJd9mKsXMcfFTQt8GLcd8ukfohV7Pdym2s34m8x2BP IVGekyJylZqNoFDeZfYV0dSm7+bTypBLLvNoIVuswfP1DRoibnuYHqQfpfpIaK3lHWoI i9KvAfBG4pwPIjxV0m+9aCzKKWT/nCsH9W/X4=
Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:user-agent:date:subject:from:to:cc:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; b=SsegtLfHTSIpiGxxMYjP331Hps9Me5okKMqPF8+YeK/9aPzXrh2bLta4a2GesMSQoH BTmA/e3XX4Xp5KUtcQGh0Ib94I06dR4UrnzL6CC/ho2+zjcA75MzwlZzf039lvMHq75X 80V5dSx5/4zx5FESslvY9Fin0U6UUtwbARehA=
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4DC02878020000780003F690@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcwJ1kMUJMmam3PrMEC8xfc6rwciwA==
Thread-topic: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
User-agent: Microsoft-Entourage/12.29.0.110113
On 03/05/2011 15:08, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> It would seem possible to fold the two trees into one (making e.g. the
> emuirq bits stored in the upper half of the pointer), but I'm not
> certain that's worth it as it would make deletion of entries more
> cumbersome. Unless pirq-s and emuirq-s were mutually exclusive...
> 
> v2: Split setup/teardown into two stages - (de-)allocation (tree node
> (de-)population) is done with just d->event_lock held (and hence
> interrupts enabled), while actual insertion/removal of translation data
> gets done with irq_desc's lock held (and interrupts disabled).

This is mostly okay, because the only operations that occur with irqs
disabled are read-only on the radix-rtree structure itself, hence no
alloc/dealloc will happen. *However* those calls to
radix_tree_lookup[_slot]() are not synchronised wrt your calls to
radix_tree_{insert,delete}(). The latter hold d->event_lock, while the
former do not. Hence you need RCU and you need a new first patch in your
patch set to pull in a modern radix-tree.[ch] from upstream Linux.

 -- keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- 2011-04-29.orig/xen/arch/x86/domain.c
> +++ 2011-04-29/xen/arch/x86/domain.c
> @@ -614,26 +614,16 @@ int arch_domain_create(struct domain *d,
>          memset(d->arch.pirq_irq, 0,
>                 d->nr_pirqs * sizeof(*d->arch.pirq_irq));
>  
> -        d->arch.irq_pirq = xmalloc_array(int, nr_irqs);
> -        if ( !d->arch.irq_pirq )
> +        if ( (rc = init_domain_irq_mapping(d)) != 0 )
>              goto fail;
> -        memset(d->arch.irq_pirq, 0,
> -               nr_irqs * sizeof(*d->arch.irq_pirq));
> -
> -        for ( i = 1; platform_legacy_irq(i); ++i )
> -            if ( !IO_APIC_IRQ(i) )
> -                d->arch.irq_pirq[i] = d->arch.pirq_irq[i] = i;
>  
>          if ( is_hvm_domain(d) )
>          {
>              d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs);
> -            d->arch.emuirq_pirq = xmalloc_array(int, nr_irqs);
> -            if ( !d->arch.pirq_emuirq || !d->arch.emuirq_pirq )
> +            if ( !d->arch.pirq_emuirq )
>                  goto fail;
>              for (i = 0; i < d->nr_pirqs; i++)
>                  d->arch.pirq_emuirq[i] = IRQ_UNBOUND;
> -            for (i = 0; i < nr_irqs; i++)
> -                d->arch.emuirq_pirq[i] = IRQ_UNBOUND;
>          }
>  
>  
> @@ -671,9 +661,8 @@ int arch_domain_create(struct domain *d,
>      d->is_dying = DOMDYING_dead;
>      vmce_destroy_msr(d);
>      xfree(d->arch.pirq_irq);
> -    xfree(d->arch.irq_pirq);
>      xfree(d->arch.pirq_emuirq);
> -    xfree(d->arch.emuirq_pirq);
> +    cleanup_domain_irq_mapping(d);
>      free_xenheap_page(d->shared_info);
>      if ( paging_initialised )
>          paging_final_teardown(d);
> @@ -726,9 +715,8 @@ void arch_domain_destroy(struct domain *
>  
>      free_xenheap_page(d->shared_info);
>      xfree(d->arch.pirq_irq);
> -    xfree(d->arch.irq_pirq);
>      xfree(d->arch.pirq_emuirq);
> -    xfree(d->arch.emuirq_pirq);
> +    cleanup_domain_irq_mapping(d);
>  }
>  
>  unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long
> guest_cr4)
> --- 2011-04-29.orig/xen/arch/x86/irq.c
> +++ 2011-04-29/xen/arch/x86/irq.c
> @@ -950,6 +950,64 @@ struct irq_desc *domain_spin_lock_irq_de
>      return desc;
>  }
>  
> +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL,
> +                                NULL, NULL);
> +
> +    return err != -EEXIST ? err : 0;
> +}
> +
> +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq;
> +    d->arch.pirq_irq[pirq] = irq;
> +}
> +
> +static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    d->arch.pirq_irq[pirq] = 0;
> +    *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL;
> +}
> +
> +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq)
> +{
> +    radix_tree_delete(&d->arch.irq_pirq, irq, NULL);
> +}
> +
> +int init_domain_irq_mapping(struct domain *d)
> +{
> +    unsigned int i;
> +    int err = 0;
> +
> +    INIT_RADIX_TREE(&d->arch.irq_pirq, 0);
> +    if ( is_hvm_domain(d) )
> +        INIT_RADIX_TREE(&d->arch.hvm_domain.emuirq_pirq, 0);
> +
> +    for ( i = 1; platform_legacy_irq(i); ++i )
> +        if ( !IO_APIC_IRQ(i) )
> +        {
> +            err = prepare_domain_irq_pirq(d, i, i);
> +            if ( err )
> +                break;
> +            set_domain_irq_pirq(d, i, i);
> +        }
> +
> +    return err;
> +}
> +
> +static void irq_slot_free(void *unused)
> +{
> +}
> +
> +void cleanup_domain_irq_mapping(struct domain *d)
> +{
> +    radix_tree_destroy(&d->arch.irq_pirq, irq_slot_free, NULL);
> +    if ( is_hvm_domain(d) )
> +        radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq,
> +                           irq_slot_free, NULL);
> +}
> +
>  /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
>  static void flush_ready_eoi(void)
>  {
> @@ -1373,7 +1431,7 @@ void pirq_guest_unbind(struct domain *d,
>  {
>      irq_guest_action_t *oldaction = NULL;
>      struct irq_desc *desc;
> -    int irq;
> +    int irq = 0;
>  
>      WARN_ON(!spin_is_locked(&d->event_lock));
>  
> @@ -1386,7 +1444,7 @@ void pirq_guest_unbind(struct domain *d,
>          BUG_ON(irq <= 0);
>          desc = irq_to_desc(irq);
>          spin_lock_irq(&desc->lock);
> -        d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0;
> +        clear_domain_irq_pirq(d, irq, pirq);
>      }
>      else
>      {
> @@ -1400,6 +1458,8 @@ void pirq_guest_unbind(struct domain *d,
>          kill_timer(&oldaction->eoi_timer);
>          xfree(oldaction);
>      }
> +    else if ( irq > 0 )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
>  }
>  
>  static int pirq_guest_force_unbind(struct domain *d, int irq)
> @@ -1523,6 +1583,10 @@ int map_domain_pirq(
>          return ret;
>      }
>  
> +    ret = prepare_domain_irq_pirq(d, irq, pirq);
> +    if ( ret )
> +        return ret;
> +
>      desc = irq_to_desc(irq);
>  
>      if ( type == MAP_PIRQ_TYPE_MSI )
> @@ -1544,19 +1608,20 @@ int map_domain_pirq(
>              dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
>                d->domain_id, irq);
>          desc->handler = &pci_msi_type;
> -        d->arch.pirq_irq[pirq] = irq;
> -        d->arch.irq_pirq[irq] = pirq;
> +        set_domain_irq_pirq(d, irq, pirq);
>          setup_msi_irq(pdev, msi_desc, irq);
>          spin_unlock_irqrestore(&desc->lock, flags);
> -    } else
> +    }
> +    else
>      {
>          spin_lock_irqsave(&desc->lock, flags);
> -        d->arch.pirq_irq[pirq] = irq;
> -        d->arch.irq_pirq[irq] = pirq;
> +        set_domain_irq_pirq(d, irq, pirq);
>          spin_unlock_irqrestore(&desc->lock, flags);
>      }
>  
>   done:
> +    if ( ret )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
>      return ret;
>  }
>  
> @@ -1599,20 +1664,20 @@ int unmap_domain_pirq(struct domain *d,
>      BUG_ON(irq != domain_pirq_to_irq(d, pirq));
>  
>      if ( !forced_unbind )
> -    {
> -        d->arch.pirq_irq[pirq] = 0;
> -        d->arch.irq_pirq[irq] = 0;
> -    }
> +        clear_domain_irq_pirq(d, irq, pirq);
>      else
>      {
>          d->arch.pirq_irq[pirq] = -irq;
> -        d->arch.irq_pirq[irq] = -pirq;
> +        *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void
> *)(long)-pirq;
>      }
>  
>      spin_unlock_irqrestore(&desc->lock, flags);
>      if (msi_desc)
>          msi_free_irq(msi_desc);
>  
> +    if ( !forced_unbind )
> +        cleanup_domain_irq_pirq(d, irq, pirq);
> +
>      ret = irq_deny_access(d, pirq);
>      if ( ret )
>          dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n",
> @@ -1829,10 +1894,25 @@ int map_domain_emuirq_pirq(struct domain
>          return 0;
>      }
>  
> -    d->arch.pirq_emuirq[pirq] = emuirq;
>      /* do not store emuirq mappings for pt devices */
>      if ( emuirq != IRQ_PT )
> -        d->arch.emuirq_pirq[emuirq] = pirq;
> +    {
> +        int err = radix_tree_insert(&d->arch.hvm_domain.emuirq_pirq, emuirq,
> +                                    (void *)((long)pirq + 1), NULL, NULL);
> +
> +        switch ( err )
> +        {
> +        case 0:
> +            break;
> +        case -EEXIST:
> +            *radix_tree_lookup_slot(&d->arch.hvm_domain.emuirq_pirq, emuirq)
> =
> +                (void *)((long)pirq + 1);
> +            break;
> +        default:
> +            return err;
> +        }
> +    }
> +    d->arch.pirq_emuirq[pirq] = emuirq;
>  
>      return 0;
>  }
> @@ -1860,7 +1940,7 @@ int unmap_domain_pirq_emuirq(struct doma
>  
>      d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND;
>      if ( emuirq != IRQ_PT )
> -        d->arch.emuirq_pirq[emuirq] = IRQ_UNBOUND;
> +        radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL);
>  
>   done:
>      return ret;
> --- 2011-04-29.orig/xen/common/radix-tree.c
> +++ 2011-04-29/xen/common/radix-tree.c
> @@ -26,7 +26,6 @@
>   * o tagging code removed
>   * o radix_tree_insert has func parameter for dynamic data struct allocation
>   * o radix_tree_destroy added (including recursive helper function)
> - * o __init functions must be called explicitly
>   * o other include files adapted to Xen
>   */
>  
> @@ -35,6 +34,7 @@
>  #include <xen/lib.h>
>  #include <xen/types.h>
>  #include <xen/errno.h>
> +#include <xen/xmalloc.h>
>  #include <xen/radix-tree.h>
>  #include <asm/cache.h>
>  
> @@ -49,6 +49,18 @@ static inline unsigned long radix_tree_m
>      return height_to_maxindex[height];
>  }
>  
> +static struct radix_tree_node *_node_alloc(void *unused)
> +{
> +    struct radix_tree_node *node = xmalloc(struct radix_tree_node);
> +
> +    return node ? memset(node, 0, sizeof(*node)) : node;
> +}
> +
> +static void _node_free(struct radix_tree_node *node)
> +{
> +    xfree(node);
> +}
> +
>  /*
>   * Extend a radix tree so it can store key @index.
>   */
> @@ -100,6 +112,9 @@ int radix_tree_insert(struct radix_tree_
>      int offset;
>      int error;
>  
> +    if (!node_alloc)
> +        node_alloc = _node_alloc;
> +
>      /* Make sure the tree is high enough.  */
>      if (index > radix_tree_maxindex(root->height)) {
>          error = radix_tree_extend(root, index, node_alloc, arg);
> @@ -336,6 +351,9 @@ void *radix_tree_delete(struct radix_tre
>      unsigned int height, shift;
>      int offset;
>  
> +    if (!node_free)
> +        node_free = _node_free;
> +
>      height = root->height;
>      if (index > radix_tree_maxindex(height))
>          goto out;
> @@ -420,6 +438,8 @@ void radix_tree_destroy(struct radix_tre
>      if (root->height == 0)
>          slot_free(root->rnode);
>      else {
> +        if (!node_free)
> +            node_free = _node_free;
>          radix_tree_node_destroy(root->rnode, root->height,
>                                  slot_free, node_free);
>          node_free(root->rnode);
> @@ -440,10 +460,14 @@ static unsigned long __init __maxindex(u
>      return index;
>  }
>  
> -void __init radix_tree_init(void)
> +static int __init radix_tree_init(void)
>  {
>      unsigned int i;
>  
>      for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
>          height_to_maxindex[i] = __maxindex(i);
> +
> +    return 0;
>  }
> +/* pre-SMP just so it runs before 'normal' initcalls */
> +presmp_initcall(radix_tree_init);
> --- 2011-04-29.orig/xen/common/tmem.c
> +++ 2011-04-29/xen/common/tmem.c
> @@ -2925,7 +2925,6 @@ static int __init init_tmem(void)
>      if ( !tmh_enabled() )
>          return 0;
>  
> -    radix_tree_init();
>      if ( tmh_dedup_enabled() )
>          for (i = 0; i < 256; i++ )
>          {
> --- 2011-04-29.orig/xen/include/asm-x86/domain.h
> +++ 2011-04-29/xen/include/asm-x86/domain.h
> @@ -3,6 +3,7 @@
>  
>  #include <xen/config.h>
>  #include <xen/mm.h>
> +#include <xen/radix-tree.h>
>  #include <asm/hvm/vcpu.h>
>  #include <asm/hvm/domain.h>
>  #include <asm/e820.h>
> @@ -284,10 +285,9 @@ struct arch_domain
>      const char *nested_p2m_function;
>  
>      /* NB. protected by d->event_lock and by irq_desc[irq].lock */
> -    int *irq_pirq;
> +    struct radix_tree_root irq_pirq;
>      int *pirq_irq;
> -    /* pirq to emulated irq and vice versa */
> -    int *emuirq_pirq;
> +    /* pirq to emulated irq */
>      int *pirq_emuirq;
>  
>      /* Maximum physical-address bitwidth supported by this guest. */
> --- 2011-04-29.orig/xen/include/asm-x86/hvm/domain.h
> +++ 2011-04-29/xen/include/asm-x86/hvm/domain.h
> @@ -59,6 +59,9 @@ struct hvm_domain {
>      /* VCPU which is current target for 8259 interrupts. */
>      struct vcpu           *i8259_target;
>  
> +    /* emulated irq to pirq */
> +    struct radix_tree_root emuirq_pirq;
> +
>      /* hvm_print_line() logging. */
>  #define HVM_PBUF_SIZE 80
>      char                  *pbuf;
> --- 2011-04-29.orig/xen/include/asm-x86/irq.h
> +++ 2011-04-29/xen/include/asm-x86/irq.h
> @@ -143,11 +143,17 @@ int bind_irq_vector(int irq, int vector,
>  
>  void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
>  
> +int init_domain_irq_mapping(struct domain *);
> +void cleanup_domain_irq_mapping(struct domain *);
> +
>  #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
> -#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
> +#define domain_irq_to_pirq(d, irq) \
> +    ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq))
>  #define PIRQ_ALLOCATED -1
>  #define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq])
> -#define domain_emuirq_to_pirq(d, emuirq) ((d)->arch.emuirq_pirq[emuirq])
> +#define domain_emuirq_to_pirq(d, emuirq) \
> +    (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \
> +     IRQ_UNBOUND + 1) - 1)
>  #define IRQ_UNBOUND -1
>  #define IRQ_PT -2
>  
> --- 2011-04-29.orig/xen/include/xen/radix-tree.h
> +++ 2011-04-29/xen/include/xen/radix-tree.h
> @@ -73,6 +73,5 @@ void *radix_tree_delete(struct radix_tre
>  unsigned int
>  radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
>                         unsigned long first_index, unsigned int max_items);
> -void radix_tree_init(void);
>  
>  #endif /* _XEN_RADIX_TREE_H */
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel