WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ia64-devel

Re: [Xen-ia64-devel] [PATCH] support of 4k page size for individual gues

To: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-ia64-devel] [PATCH] support of 4k page size for individual guests
From: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
Date: Fri, 17 Aug 2007 12:00:06 +0900
Cc: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Thu, 16 Aug 2007 20:00:25 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <46C42B2A.20505@xxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-ia64-devel-request@lists.xensource.com?subject=help>
List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
List-post: <mailto:xen-ia64-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=unsubscribe>
References: <46C42B2A.20505@xxxxxxxxxxxxxxxxxxx>
Sender: xen-ia64-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.4.2.1i
On Thu, Aug 16, 2007 at 12:47:06PM +0200, Juergen Gross wrote:
> Hi,

Hi.
I thought the patch would be very small, your patch proved that I was wrong.


> this is the patch needed to support 4k (and 8k) pages for individual guests
> (currently PV only).
> "normal" domU's should not be affected, as the per-vcpu vhpt is reconfigured
> only if a domU uses a page size less than PAGE_SIZE.
> I haven't touched grant pages yet, I think they should work on PAGE_SIZE base
> as before, but I didn't check it.

Althogh grant table and domain save/restore don't seem to be given
a condieration, I have some comments. See below.
Since vcpu_rebuild_vhpt() is triggered at early boot process
before interesting xen-related operations, it won't cause serios issues
in practice though.



> diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/faults.c
> --- a/xen/arch/ia64/xen/faults.c      Thu Aug  9 08:48:00 2007 +0200
> +++ b/xen/arch/ia64/xen/faults.c      Thu Aug 16 11:33:27 2007 +0200
...
> @@ -741,7 +743,8 @@ ia64_shadow_fault(unsigned long ifa, uns
>       pte = vlfe->page_flags;
>       if (vlfe->ti_tag == ia64_ttag(ifa)) {
>               /* The VHPT entry is valid.  */
> -             gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
> +             gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >>
> +                                      v->arch.vhpt_pg_shift);
>               BUG_ON(gpfn == INVALID_M2P_ENTRY);
>       } else {
>               unsigned long itir, iha;
> @@ -757,10 +760,10 @@ ia64_shadow_fault(unsigned long ifa, uns
>               /* Try again!  */
>               if (fault != IA64_NO_FAULT) {
>                       /* This will trigger a dtlb miss.  */
> -                     ia64_ptcl(ifa, PAGE_SHIFT << 2);
> -                     return;
> -             }
> -             gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
> +                     ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
> +                     return;
> +             }
> +             gpfn = ((pte & _PAGE_PPN_MASK) >> v->arch.vhpt_pg_shift);
>               if (pte & _PAGE_D)
>                       pte |= _PAGE_VIRT_D;
>       }
> @@ -788,7 +791,7 @@ ia64_shadow_fault(unsigned long ifa, uns
>                       /* Purge the TC locally.
>                          It will be reloaded from the VHPT iff the
>                          VHPT entry is still valid.  */
> -                     ia64_ptcl(ifa, PAGE_SHIFT << 2);
> +                     ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
>  
>                       atomic64_inc(&d->arch.shadow_fault_count);
>               } else {
> @@ -800,6 +803,6 @@ ia64_shadow_fault(unsigned long ifa, uns
>               /* We don't know wether or not the fault must be
>                  reflected.  The VHPT entry is not valid.  */
>               /* FIXME: in metaphysical mode, we could do an ITC now.  */
> -             ia64_ptcl(ifa, PAGE_SHIFT << 2);
> -     }
> -}
> +             ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
> +     }
> +}

The dirty page bitmap is allocated in PAGE_SIZE unit and it's the dirty
page logging ABI at this moment.
So we should stay with PAGE_SIZE or revise the ABI.
If we sould stay with PAGE_SIZE, the logging unit might be
coarser than what you want.
If we would revise the ABI, what if vcpu_rebuild_vhpt() during
logging dirty pages? (Off course we need to revise the tool stack too.)


> diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vhpt.c
> --- a/xen/arch/ia64/xen/vhpt.c        Thu Aug  9 08:48:00 2007 +0200
> +++ b/xen/arch/ia64/xen/vhpt.c        Thu Aug 16 11:33:27 2007 +0200
...
> @@ -291,6 +292,7 @@ __flush_vhpt_range(unsigned long vhpt_ma
>  __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
>  {
>       void *vhpt_base = __va(vhpt_maddr);
> +     u64 pgsz = 1L << current->arch.vhpt_pg_shift;
>  
>       while ((long)addr_range > 0) {
>               /* Get the VHPT entry.  */
> @@ -298,8 +300,8 @@ __flush_vhpt_range(unsigned long vhpt_ma
>                       __va_ul(vcpu_vhpt_maddr(current));
>               struct vhpt_lf_entry *v = vhpt_base + off;
>               v->ti_tag = INVALID_TI_TAG;
> -             addr_range -= PAGE_SIZE;
> -             vadr += PAGE_SIZE;
> +             addr_range -= pgsz;
> +             vadr += pgsz;
>       }
>  }
>  
> @@ -362,7 +364,8 @@ void domain_flush_vtlb_range (struct dom
>       // ptc.ga has release semantics.
>  
>       /* ptc.ga  */
> -     platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT);
> +     platform_global_tlb_purge(vadr, vadr + addr_range,
> +                               current->arch.vhpt_pg_shift);
>       perfc_incr(domain_flush_vtlb_range);
>  }
>  
> @@ -381,6 +384,7 @@ __domain_flush_vtlb_track_entry(struct d
>       int cpu;
>       int vcpu;
>       int local_purge = 1;
> +     unsigned char ps = current->arch.vhpt_pg_shift;
>       
>       BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
>       /*
> @@ -413,7 +417,7 @@ __domain_flush_vtlb_track_entry(struct d
>                               continue;
>  
>                       /* Invalidate VHPT entries.  */
> -                     vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
> +                     vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
>  
>                       /*
>                        * current->processor == v->processor
> @@ -427,7 +431,7 @@ __domain_flush_vtlb_track_entry(struct d
>       } else {
>               for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
>                       /* Invalidate VHPT entries.  */
> -                     cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
> +                     cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
>  
>                       if (d->vcpu[cpu] != current)
>                               local_purge = 0;
> @@ -436,12 +440,11 @@ __domain_flush_vtlb_track_entry(struct d
>  
>       /* ptc.ga  */
>       if (local_purge) {
> -             ia64_ptcl(vaddr, PAGE_SHIFT << 2);
> +             ia64_ptcl(vaddr, ps << 2);
>               perfc_incr(domain_flush_vtlb_local);
>       } else {
>               /* ptc.ga has release semantics. */
> -             platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE,
> -                                       PAGE_SHIFT);
> +             platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
>               perfc_incr(domain_flush_vtlb_global);
>       }
>  

__domain_flush_vtlb_track_entry() is for tlb insert tracking.
It is tightly coupled with the p2m table
(i.e. it is tightly coupled with PAGE_SIZE) and it tracks tlb insert
in PAGE_SIZE unit.
Please notice vaddr &= PAGE_MASK in __vcpu_tlb_track_insert_or_dirty().
So the tlb flushing part must flush in PAGE_SIZE.

The tlb tracking is for a domain which maps the foreign domain page
(i.e. usually a driver domain), so just staying PAGE_SIZE would
be the easiest way here, I suppose.
However if your guest OS maps foreign domains pages, things will change.
I don't think it does because you postponed the grant tables issues, right?

thanks,
-- 
yamahata

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel