Re: [Xen-ia64-devel] [PATCH] support of 4k page size for individ

On Thu, Aug 16, 2007 at 12:47:06PM +0200, Juergen Gross wrote:
> Hi,

Hi.
I thought the patch would be very small, your patch proved that I was wrong.


> this is the patch needed to support 4k (and 8k) pages for individual guests
> (currently PV only).
> "normal" domU's should not be affected, as the per-vcpu vhpt is reconfigured
> only if a domU uses a page size less than PAGE_SIZE.
> I haven't touched grant pages yet, I think they should work on PAGE_SIZE base
> as before, but I didn't check it.

Althogh grant table and domain save/restore don't seem to be given
a condieration, I have some comments. See below.
Since vcpu_rebuild_vhpt() is triggered at early boot process
before interesting xen-related operations, it won't cause serios issues
in practice though.



> diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/faults.c
> --- a/xen/arch/ia64/xen/faults.c      Thu Aug  9 08:48:00 2007 +0200
> +++ b/xen/arch/ia64/xen/faults.c      Thu Aug 16 11:33:27 2007 +0200
...
> @@ -741,7 +743,8 @@ ia64_shadow_fault(unsigned long ifa, uns
>       pte = vlfe->page_flags;
>       if (vlfe->ti_tag == ia64_ttag(ifa)) {
>               /* The VHPT entry is valid.  */
> -             gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
> +             gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >>
> +                                      v->arch.vhpt_pg_shift);
>               BUG_ON(gpfn == INVALID_M2P_ENTRY);
>       } else {
>               unsigned long itir, iha;
> @@ -757,10 +760,10 @@ ia64_shadow_fault(unsigned long ifa, uns
>               /* Try again!  */
>               if (fault != IA64_NO_FAULT) {
>                       /* This will trigger a dtlb miss.  */
> -                     ia64_ptcl(ifa, PAGE_SHIFT << 2);
> -                     return;
> -             }
> -             gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
> +                     ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
> +                     return;
> +             }
> +             gpfn = ((pte & _PAGE_PPN_MASK) >> v->arch.vhpt_pg_shift);
>               if (pte & _PAGE_D)
>                       pte |= _PAGE_VIRT_D;
>       }
> @@ -788,7 +791,7 @@ ia64_shadow_fault(unsigned long ifa, uns
>                       /* Purge the TC locally.
>                          It will be reloaded from the VHPT iff the
>                          VHPT entry is still valid.  */
> -                     ia64_ptcl(ifa, PAGE_SHIFT << 2);
> +                     ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
>  
>                       atomic64_inc(&d->arch.shadow_fault_count);
>               } else {
> @@ -800,6 +803,6 @@ ia64_shadow_fault(unsigned long ifa, uns
>               /* We don't know wether or not the fault must be
>                  reflected.  The VHPT entry is not valid.  */
>               /* FIXME: in metaphysical mode, we could do an ITC now.  */
> -             ia64_ptcl(ifa, PAGE_SHIFT << 2);
> -     }
> -}
> +             ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
> +     }
> +}

The dirty page bitmap is allocated in PAGE_SIZE unit and it's the dirty
page logging ABI at this moment.
So we should stay with PAGE_SIZE or revise the ABI.
If we sould stay with PAGE_SIZE, the logging unit might be
coarser than what you want.
If we would revise the ABI, what if vcpu_rebuild_vhpt() during
logging dirty pages? (Off course we need to revise the tool stack too.)


> diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vhpt.c
> --- a/xen/arch/ia64/xen/vhpt.c        Thu Aug  9 08:48:00 2007 +0200
> +++ b/xen/arch/ia64/xen/vhpt.c        Thu Aug 16 11:33:27 2007 +0200
...
> @@ -291,6 +292,7 @@ __flush_vhpt_range(unsigned long vhpt_ma
>  __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
>  {
>       void *vhpt_base = __va(vhpt_maddr);
> +     u64 pgsz = 1L << current->arch.vhpt_pg_shift;
>  
>       while ((long)addr_range > 0) {
>               /* Get the VHPT entry.  */
> @@ -298,8 +300,8 @@ __flush_vhpt_range(unsigned long vhpt_ma
>                       __va_ul(vcpu_vhpt_maddr(current));
>               struct vhpt_lf_entry *v = vhpt_base + off;
>               v->ti_tag = INVALID_TI_TAG;
> -             addr_range -= PAGE_SIZE;
> -             vadr += PAGE_SIZE;
> +             addr_range -= pgsz;
> +             vadr += pgsz;
>       }
>  }
>  
> @@ -362,7 +364,8 @@ void domain_flush_vtlb_range (struct dom
>       // ptc.ga has release semantics.
>  
>       /* ptc.ga  */
> -     platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT);
> +     platform_global_tlb_purge(vadr, vadr + addr_range,
> +                               current->arch.vhpt_pg_shift);
>       perfc_incr(domain_flush_vtlb_range);
>  }
>  
> @@ -381,6 +384,7 @@ __domain_flush_vtlb_track_entry(struct d
>       int cpu;
>       int vcpu;
>       int local_purge = 1;
> +     unsigned char ps = current->arch.vhpt_pg_shift;
>       
>       BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
>       /*
> @@ -413,7 +417,7 @@ __domain_flush_vtlb_track_entry(struct d
>                               continue;
>  
>                       /* Invalidate VHPT entries.  */
> -                     vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
> +                     vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
>  
>                       /*
>                        * current->processor == v->processor
> @@ -427,7 +431,7 @@ __domain_flush_vtlb_track_entry(struct d
>       } else {
>               for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
>                       /* Invalidate VHPT entries.  */
> -                     cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
> +                     cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
>  
>                       if (d->vcpu[cpu] != current)
>                               local_purge = 0;
> @@ -436,12 +440,11 @@ __domain_flush_vtlb_track_entry(struct d
>  
>       /* ptc.ga  */
>       if (local_purge) {
> -             ia64_ptcl(vaddr, PAGE_SHIFT << 2);
> +             ia64_ptcl(vaddr, ps << 2);
>               perfc_incr(domain_flush_vtlb_local);
>       } else {
>               /* ptc.ga has release semantics. */
> -             platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE,
> -                                       PAGE_SHIFT);
> +             platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
>               perfc_incr(domain_flush_vtlb_global);
>       }
>  

__domain_flush_vtlb_track_entry() is for tlb insert tracking.
It is tightly coupled with the p2m table
(i.e. it is tightly coupled with PAGE_SIZE) and it tracks tlb insert
in PAGE_SIZE unit.
Please notice vaddr &= PAGE_MASK in __vcpu_tlb_track_insert_or_dirty().
So the tlb flushing part must flush in PAGE_SIZE.

The tlb tracking is for a domain which maps the foreign domain page
(i.e. usually a driver domain), so just staying PAGE_SIZE would
be the easiest way here, I suppose.
However if your guest OS maps foreign domains pages, things will change.
I don't think it does because you postponed the grant tables issues, right?

thanks,
-- 
yamahata

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
WARNING - OLD ARCHIVES

xen-ia64-devel

Re: [Xen-ia64-devel] [PATCH] support of 4k page size for individual gues