WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ia64-devel

Re: [Xen-ia64-devel][PATCH][VTD] small patches for VTD

To: "Xu, Anthony" <anthony.xu@xxxxxxxxx>
Subject: Re: [Xen-ia64-devel][PATCH][VTD] small patches for VTD
From: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
Date: Wed, 22 Oct 2008 14:32:01 +0900
Cc: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Tue, 21 Oct 2008 22:32:06 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <F7C8A4D3A9905B45A80E4C194793FA6501B3130ED5@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-ia64-devel-request@lists.xensource.com?subject=help>
List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
List-post: <mailto:xen-ia64-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=unsubscribe>
References: <F7C8A4D3A9905B45A80E4C194793FA6501B3130ED5@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-ia64-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.6i
[I added Kyouya and Akio to CC for comments on the hunk in vtlb.c]


On Tue, Oct 21, 2008 at 07:38:21PM +0800, Xu, Anthony wrote:
> other small patches for VTD
> 
> Signed-off-by: Anthony Xu <anthony.xu@xxxxxxxxx>
> 
> 
> This patch is based on #Cset 18673 of xen-devel
> 
> Thanks,
> Anthony


The patch touches the common codes for both PV and HVM domain
without any hvm domain check.
Before you told the target was only hvm domain (at this moment).
Hmm, what is the exact target the series of patch.
(Off course, eventually you may want to support all the cases.)

Some comments below.

> other small patches for VTD
> 
> Signed-off-by: Anthony Xu <anthony.xu@xxxxxxxxx>
> 
> 
> 
> diff -r e1ed3e5cd001 xen/arch/ia64/linux-xen/acpi.c
> --- a/xen/arch/ia64/linux-xen/acpi.c  Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/linux-xen/acpi.c  Tue Oct 21 19:13:45 2008 +0800
> @@ -797,6 +797,10 @@
>       if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
>               printk(KERN_ERR PREFIX "Can't find FADT\n");
>  
> +#ifdef XEN
> +     acpi_dmar_init();
> +#endif
> +
>  #ifdef CONFIG_SMP
>       if (available_cpus == 0) {
>               printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
> diff -r e1ed3e5cd001 xen/arch/ia64/vmx/viosapic.c
> --- a/xen/arch/ia64/vmx/viosapic.c    Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/vmx/viosapic.c    Tue Oct 21 19:13:45 2008 +0800
> @@ -121,6 +121,13 @@
>                       redir_num, vector);
>          return;
>      }
> +    if ( iommu_enabled )
> +    {
> +        spin_unlock(&viosapic->lock);
> +        hvm_dpci_eoi(current->domain, redir_num, 
> &viosapic->redirtbl[redir_num]);
> +        spin_lock(&viosapic->lock);
> +    }
> +
>      service_iosapic(viosapic);
>      spin_unlock(&viosapic->lock);
>  }
> diff -r e1ed3e5cd001 xen/arch/ia64/vmx/vmx_fault.c
> --- a/xen/arch/ia64/vmx/vmx_fault.c   Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/vmx/vmx_fault.c   Tue Oct 21 19:13:45 2008 +0800
> @@ -54,6 +54,7 @@
>  #include <asm/shadow.h>
>  #include <asm/sioemu.h>
>  #include <public/arch-ia64/sioemu.h>
> +#include <xen/hvm/irq.h>
>  
>  /* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
>  #define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
> @@ -306,6 +307,7 @@
>                  viosapic_set_irq(d, callback_irq, 0);
>              }
>          }
> +        hvm_dirq_assist(v);
>      }
>  
>      rmb();
> diff -r e1ed3e5cd001 xen/arch/ia64/vmx/vtlb.c
> --- a/xen/arch/ia64/vmx/vtlb.c        Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/vmx/vtlb.c        Tue Oct 21 19:13:45 2008 +0800
> @@ -522,7 +522,10 @@
>       * which is required by vga acceleration since qemu maps shared
>       * vram buffer with WB.
>       */
> -    if (phy_pte.ma != VA_MATTR_NATPAGE)
> +    /* if a device is assigned to a domain through VTD, the MMIO for this
> +     * device needs to retain to UC attribute
> +     */
> +    if (phy_pte.ma == VA_MATTR_WC)
>          phy_pte.ma = VA_MATTR_WB;
>  

Doesn't this break the intention of the c/s 15134:466f71b1e831?a
To be honest, I'm not sure. Kyouya or Akio, do you have any comments?


>      maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | (paddr & ~PAGE_MASK);
> diff -r e1ed3e5cd001 xen/arch/ia64/xen/domain.c
> --- a/xen/arch/ia64/xen/domain.c      Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/xen/domain.c      Tue Oct 21 19:13:45 2008 +0800
> @@ -569,6 +569,7 @@
>       if (is_idle_domain(d))
>           return 0;
>  
> +     INIT_LIST_HEAD(&d->arch.pdev_list);
>       foreign_p2m_init(d);
>  #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
>       d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt;

This hunk shoundn't be included in this patch.


> @@ -601,6 +602,9 @@
>       if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
>           goto fail_nomem;
>  
> +     if(iommu_domain_init(d) != 0)
> +             goto fail_iommu;
> +
>       /*
>        * grant_table_create() can't fully initialize grant table for domain
>        * because it is called before arch_domain_create().
> @@ -617,6 +621,8 @@
>       dprintk(XENLOG_DEBUG, "arch_domain_create: domain=%p\n", d);
>       return 0;
>  
> +fail_iommu:
> +     iommu_domain_destroy(d);
>  fail_nomem:
>       tlb_track_destroy(d);
>  fail_nomem1:
> @@ -635,6 +641,10 @@
>       if (d->shared_info != NULL)
>               free_xenheap_pages(d->shared_info,
>                                  get_order_from_shift(XSI_SHIFT));
> +
> +     pci_release_devices(d);
> +     if( !is_idle_domain(d))
> +             iommu_domain_destroy(d);
>  
>       tlb_track_destroy(d);
>  
> diff -r e1ed3e5cd001 xen/arch/ia64/xen/irq.c
> --- a/xen/arch/ia64/xen/irq.c Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/xen/irq.c Tue Oct 21 19:13:45 2008 +0800
> @@ -312,12 +312,25 @@
>      struct domain *guest[IRQ_MAX_GUESTS];
>  } irq_guest_action_t;
>  
> +static struct timer irq_guest_eoi_timer[NR_IRQS];
> +static void irq_guest_eoi_timer_fn(void *data)
> +{
> +     irq_desc_t *desc = data;
> +     unsigned vector = desc - irq_desc;
> +     unsigned long flags;
> +
> +     spin_lock_irqsave(&desc->lock, flags);
> +     desc->status &= ~IRQ_INPROGRESS;
> +     desc->handler->enable(vector);
> +     spin_unlock_irqrestore(&desc->lock, flags);
> +}
> +
>  void __do_IRQ_guest(int irq)
>  {
>      irq_desc_t         *desc = &irq_desc[irq];
>      irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
>      struct domain      *d;
> -    int                 i;
> +    int                 i, already_pending = 0;
>  
>      for ( i = 0; i < action->nr_guests; i++ )
>      {
> @@ -325,8 +338,29 @@
>          if ( (action->ack_type != ACKTYPE_NONE) &&
>               !test_and_set_bit(irq, &d->pirq_mask) )
>              action->in_flight++;
> -        send_guest_pirq(d, irq);
> -    }
> +             if ( hvm_do_IRQ_dpci(d, irq) )
> +             {
> +                     if ( action->ack_type == ACKTYPE_NONE )
> +                     {
> +                             already_pending += !!(desc->status & 
> IRQ_INPROGRESS);
> +                             desc->status |= IRQ_INPROGRESS; /* cleared 
> during hvm eoi */
> +                     }
> +             }
> +             else if ( send_guest_pirq(d, irq) &&
> +                             (action->ack_type == ACKTYPE_NONE) )
> +             {
> +                     already_pending++;
> +             }
> +     }
> +
> +     if ( already_pending == action->nr_guests )
> +     {
> +             desc->handler->disable(irq);
> +             stop_timer(&irq_guest_eoi_timer[irq]);
> +             init_timer(&irq_guest_eoi_timer[irq],
> +                             irq_guest_eoi_timer_fn, desc, 
> smp_processor_id());
> +             set_timer(&irq_guest_eoi_timer[irq], NOW() + MILLISECS(1));
> +     }
>  }
>  
>  int pirq_acktype(int irq)

Is those hunk for interrupt storm avoidance as 17963:1db0b09b290e?
If so, please split it out and send it as another patch.


> diff -r e1ed3e5cd001 xen/arch/ia64/xen/mm.c
> --- a/xen/arch/ia64/xen/mm.c  Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/arch/ia64/xen/mm.c  Tue Oct 21 19:13:45 2008 +0800
> @@ -189,6 +189,10 @@
>  
>  static void __xencomm_mark_dirty(struct domain *d,
>                                   unsigned long addr, unsigned int len);
> +
> +static void
> +zap_domain_page_one(struct domain *d, unsigned long mpaddr,
> +                    int clear_PGC_allocate, unsigned long mfn);
>  
>  extern unsigned long ia64_iobase;
>  
> @@ -908,20 +912,21 @@
>                       unsigned long flags)
>  {
>      volatile pte_t *pte;
> -    pte_t old_pte;
>      pte_t new_pte;
>      pte_t ret_pte;
>      unsigned long prot = flags_to_prot(flags);
>  
>      pte = lookup_alloc_domain_pte(d, mpaddr);
> +    new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot));
> +    if(pte_val(new_pte) == pte_val(*pte))
> +        return 0;
>  
> -    old_pte = __pte(0);
> -    new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot));
> -    ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte);
> -    if (pte_val(ret_pte) == pte_val(old_pte)) {
> -        smp_mb();
> -        return 0;
> -    }
> +    /* for assigned MMIO, the old pte may be set to _PAGE_IO attribute,
> +     * so zap it first, then set up it.
> +     */
> +
> +    zap_domain_page_one(d, mpaddr, 1, INVALID_MFN);
> +    ret_pte = ptep_xchg(&d->arch.mm, mpaddr, pte, new_pte);
>  
>      // dom0 tries to map real machine's I/O region, but failed.
>      // It is very likely that dom0 doesn't boot correctly because

Hmmm, are you really sure that the above is SMP-safe?
We are touching p2m table locklessly so we must be extremely
careful. The above hunk split the atomic operation into two phase
and makes the following logic not make sense.


> @@ -1420,10 +1425,12 @@
>          return;
>      if (pte_none(*pte))
>          return;
> -
> +    
>      if (mfn == INVALID_MFN) {
>          // clear pte
>          old_pte = ptep_get_and_clear(mm, mpaddr, pte);
> +        if(!pte_mem(old_pte))
> +            return;
>          mfn = pte_pfn(old_pte);
>      } else {
>          unsigned long old_arflags;
> @@ -1461,6 +1468,7 @@
>      if(!mfn_valid(mfn))
>          return;
>  
> +    iommu_unmap_page(d, mpaddr>>PAGE_SHIFT);

Isn't PAGE_SHIFT_4K loop necessary?


>      page = mfn_to_page(mfn);
>      BUG_ON((page->count_info & PGC_count_mask) == 0);
>  
> @@ -2841,10 +2849,16 @@
>  __guest_physmap_add_page(struct domain *d, unsigned long gpfn,
>                           unsigned long mfn)
>  {
> +    int i, j;
> +
>      set_gpfn_from_mfn(mfn, gpfn);
>      smp_mb();
>      assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
>                                 ASSIGN_writable | ASSIGN_pgc_allocated);
> +    j = 1 << (PAGE_SHIFT-PAGE_SHIFT_4K);
> +    for(i = 0 ; i < j; i++)
> +        iommu_map_page(d, gpfn*j + i, mfn*j + i);
> +
>  }
>  
>  int
> diff -r e1ed3e5cd001 xen/include/asm-ia64/linux-xen/asm/acpi.h
> --- a/xen/include/asm-ia64/linux-xen/asm/acpi.h       Tue Oct 21 18:55:22 
> 2008 +0800
> +++ b/xen/include/asm-ia64/linux-xen/asm/acpi.h       Tue Oct 21 19:13:45 
> 2008 +0800
> @@ -38,6 +38,7 @@
>  #include <asm/numa.h>
>  #ifdef XEN
>  #include <xen/nodemask.h>
> +extern int acpi_dmar_init(void);
>  #endif
>  
>  #define COMPILER_DEPENDENT_INT64     long
> diff -r e1ed3e5cd001 xen/include/asm-ia64/linux-xen/asm/iosapic.h
> --- a/xen/include/asm-ia64/linux-xen/asm/iosapic.h    Tue Oct 21 18:55:22 
> 2008 +0800
> +++ b/xen/include/asm-ia64/linux-xen/asm/iosapic.h    Tue Oct 21 19:13:45 
> 2008 +0800
> @@ -83,12 +83,25 @@
>  
>  static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int 
> reg)
>  {
> +#ifdef XEN
> +     if(vtd_enabled && (reg >= 10)){
> +             int apic = find_iosapic_by_addr((unsigned long)iosapic);
> +             return io_apic_read_remap_rte(apic, reg);
> +     }
> +#endif
>       writel(reg, iosapic + IOSAPIC_REG_SELECT);
>       return readl(iosapic + IOSAPIC_WINDOW);
>  }
>  
>  static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, 
> u32 val)
>  {
> +#ifdef XEN
> +     if (iommu_enabled && (reg >= 10)){
> +             int apic = find_iosapic_by_addr((unsigned long)iosapic);
> +             iommu_update_ire_from_apic(apic, reg, val);
> +             return;
> +     }
> +#endif
>       writel(reg, iosapic + IOSAPIC_REG_SELECT);
>       writel(val, iosapic + IOSAPIC_WINDOW);
>  }
> diff -r e1ed3e5cd001 xen/include/asm-ia64/viosapic.h
> --- a/xen/include/asm-ia64/viosapic.h Tue Oct 21 18:55:22 2008 +0800
> +++ b/xen/include/asm-ia64/viosapic.h Tue Oct 21 19:13:45 2008 +0800
> @@ -70,5 +70,7 @@
>  
>  unsigned long viosapic_read(struct vcpu *v, unsigned long addr,
>                              unsigned long length);
> +void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
> +                          union vioapic_redir_entry *ent);
>  
>  #endif /* __ASM_IA64_VMX_VIOSAPIC_H__ */
> 
> 
> 

-- 
yamahata

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel