Is there any measurable performance benefit to doing this?
-- Keir
On 24/1/08 15:04, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote:
> At once adjust the 2/4Mb page handling slightly in a few places (to
> match the newly added code):
> - when re-creating a large page mapping after finding that all small
> page mappings in the respective area are using identical flags and
> suitable MFNs, the virtual address was already incremented pas the
> area to be dealt with, which needs to be accounted for in the
> invocation of flush_area() in that path
> - don't or-in/and-out _PAGE_PSE on non-present pages
> - when comparing flags, try minimse the number of l1f_to_lNf()/
> lNf_to_l1f() instances used
> - instead of skipping a single page when encountering a big page
> mapping equalling to what a small page mapping would establish, skip
> to the next larger page boundary
>
> This patch won't apply cleanly without the previously sent patch
> adjusting show_page_walk().
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
>
> Index: 2008-01-18/xen/arch/x86/mm.c
> ===================================================================
> --- 2008-01-18.orig/xen/arch/x86/mm.c 2008-01-23 15:39:18.000000000 +0100
> +++ 2008-01-18/xen/arch/x86/mm.c 2008-01-23 16:22:01.000000000 +0100
> @@ -113,6 +113,8 @@
> #include <xsm/xsm.h>
> #include <xen/trace.h>
>
> +extern int early_boot;
> +
> #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
>
> /*
> @@ -3659,7 +3661,13 @@ int ptwr_do_page_fault(struct vcpu *v, u
>
> void free_xen_pagetable(void *v)
> {
> - extern int early_boot;
> +#ifdef __x86_64__
> + unsigned long ma = virt_to_maddr(v);
> + unsigned long l2_ident_ma = virt_to_maddr(l2_identmap);
> +
> + if ( ma >= l2_ident_ma && ma < l2_ident_ma + sizeof(l2_identmap) )
> + return;
> +#endif
>
> BUG_ON(early_boot);
>
> @@ -3670,8 +3678,8 @@ void free_xen_pagetable(void *v)
> }
>
> /* Convert to from superpage-mapping flags for map_pages_to_xen(). */
> -#define l1f_to_l2f(f) ((f) | _PAGE_PSE)
> -#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE)
> +#define l1f_to_lNf(f) ((f) & _PAGE_PRESENT ? (f) | _PAGE_PSE : (f))
> +#define lNf_to_l1f(f) ((f) & _PAGE_PRESENT ? (f) & ~_PAGE_PSE : (f))
>
> /*
> * map_pages_to_xen() can be called with interrupts disabled:
> @@ -3697,6 +3705,126 @@ int map_pages_to_xen(
>
> while ( nr_mfns != 0 )
> {
> +#ifdef __x86_64__
> + l3_pgentry_t *pl3e = virt_to_xen_l3e(virt);
> + l3_pgentry_t ol3e = *pl3e;
> +
> + if ( cpu_has_page1gb &&
> + !(((virt >> PAGE_SHIFT) | mfn) &
> + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) &&
> + nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) &&
> + !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) )
> + {
> + /* 1Gb-page mapping. */
> + l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags)));
> +
> + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) )
> + {
> + unsigned int flush_flags =
> + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER);
> +
> + if ( l3e_get_flags(ol3e) & _PAGE_PSE )
> + {
> + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
> + flush_flags |= FLUSH_TLB_GLOBAL;
> + if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) &
> + PAGE_CACHE_ATTRS )
> + flush_flags |= FLUSH_CACHE;
> + flush_area(virt, flush_flags);
> + }
> + else
> + {
> + pl2e = l3e_to_l2e(ol3e);
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
> + {
> + ol2e = pl2e[i];
> + if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) )
> + continue;
> + if ( l2e_get_flags(ol2e) & _PAGE_PSE )
> + {
> + if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
> + flush_flags |= FLUSH_TLB_GLOBAL;
> + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) &
> + PAGE_CACHE_ATTRS )
> + flush_flags |= FLUSH_CACHE;
> + }
> + else
> + {
> + unsigned int j;
> +
> + pl1e = l2e_to_l1e(ol2e);
> + for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
> + {
> + ol1e = pl1e[j];
> + if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL )
> + flush_flags |= FLUSH_TLB_GLOBAL;
> + if ( (l1e_get_flags(ol1e) ^ flags) &
> + PAGE_CACHE_ATTRS )
> + flush_flags |= FLUSH_CACHE;
> + }
> + }
> + }
> + flush_area(virt, flush_flags);
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
> + {
> + ol2e = pl2e[i];
> + if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) &&
> + !(l2e_get_flags(ol2e) & _PAGE_PSE) )
> + free_xen_pagetable(l2e_to_l1e(ol2e));
> + }
> + free_xen_pagetable(pl2e);
> + }
> + }
> +
> + virt += 1UL << L3_PAGETABLE_SHIFT;
> + mfn += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
> + nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
> + continue;
> + }
> +
> + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) &&
> + (l3e_get_flags(ol3e) & _PAGE_PSE) )
> + {
> + unsigned int flush_flags =
> + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER);
> +
> + /* Skip this PTE if there is no change. */
> + if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES *
> + L1_PAGETABLE_ENTRIES - 1)) +
> + (l2_table_offset(virt) << PAGETABLE_ORDER) +
> + l1_table_offset(virt) == mfn) &&
> + ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) &
> + ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 )
> + {
> + i = (1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) -
> + (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1));
> + if ( i > nr_mfns )
> + i = nr_mfns;
> + virt += i << PAGE_SHIFT;
> + mfn += i;
> + nr_mfns -= i;
> + continue;
> + }
> +
> + pl2e = alloc_xen_pagetable();
> + if ( pl2e == NULL )
> + return -ENOMEM;
> +
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
> + l2e_write(pl2e + i,
> + l2e_from_pfn(l3e_get_pfn(ol3e) +
> + (i << PAGETABLE_ORDER),
> + l3e_get_flags(ol3e)));
> +
> + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
> + flush_flags |= FLUSH_TLB_GLOBAL;
> +
> + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e),
> + __PAGE_HYPERVISOR));
> + flush_area(virt, flush_flags);
> + }
> +#endif
> +
> pl2e = virt_to_xen_l2e(virt);
>
> if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0)
> &&
> @@ -3705,7 +3833,7 @@ int map_pages_to_xen(
> {
> /* Super-page mapping. */
> ol2e = *pl2e;
> - l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags)));
> + l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_lNf(flags)));
>
> if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
> {
> @@ -3716,8 +3844,8 @@ int map_pages_to_xen(
> {
> if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
> flush_flags |= FLUSH_TLB_GLOBAL;
> - if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) &
> - l1f_to_l2f(PAGE_CACHE_ATTRS) )
> + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) &
> + PAGE_CACHE_ATTRS )
> flush_flags |= FLUSH_CACHE;
> flush_area(virt, flush_flags);
> }
> @@ -3761,13 +3889,17 @@ int map_pages_to_xen(
> /* Skip this PTE if there is no change. */
> if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) +
> l1_table_offset(virt)) == mfn) &&
> - (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
> + (((lNf_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
> ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) )
> {
> - virt += 1UL << L1_PAGETABLE_SHIFT;
> - mfn += 1UL;
> - nr_mfns -= 1UL;
> - continue;
> + i = (1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) -
> + (mfn & ((1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) -
> 1));
> + if ( i > nr_mfns )
> + i = nr_mfns;
> + virt += i << L1_PAGETABLE_SHIFT;
> + mfn += i;
> + nr_mfns -= i;
> + goto check_l3;
> }
>
> pl1e = alloc_xen_pagetable();
> @@ -3777,7 +3909,7 @@ int map_pages_to_xen(
> for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
> l1e_write(&pl1e[i],
> l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
> -
> l2f_to_l1f(l2e_get_flags(*pl2e))));
> +
> lNf_to_l1f(l2e_get_flags(*pl2e))));
>
> if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
> flush_flags |= FLUSH_TLB_GLOBAL;
> @@ -3820,13 +3952,43 @@ int map_pages_to_xen(
> {
> ol2e = *pl2e;
> l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
> - l1f_to_l2f(flags)));
> - flush_area(virt, (FLUSH_TLB_GLOBAL |
> - FLUSH_ORDER(PAGETABLE_ORDER)));
> + l1f_to_lNf(flags)));
> + flush_area(virt - PAGE_SIZE,
> + FLUSH_TLB_GLOBAL |
> FLUSH_ORDER(PAGETABLE_ORDER));
> free_xen_pagetable(l2e_to_l1e(ol2e));
> }
> }
> }
> +
> + check_l3: ;
> +#ifdef __x86_64__
> + if ( cpu_has_page1gb &&
> + !early_boot &&
> + flags == PAGE_HYPERVISOR &&
> + (nr_mfns == 0 ||
> + !(((virt >> PAGE_SHIFT) | mfn) &
> + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) )
> + {
> + unsigned long base_mfn;
> +
> + ol3e = *pl3e;
> + pl2e = l3e_to_l2e(ol3e);
> + base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES *
> + L1_PAGETABLE_ENTRIES - 1);
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
> + if ( l2e_get_pfn(*pl2e) != base_mfn + (i << PAGETABLE_ORDER)
> ||
> + l2e_get_flags(*pl2e) != l1f_to_lNf(flags) )
> + break;
> + if ( i == L2_PAGETABLE_ENTRIES )
> + {
> + l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn,
> + l1f_to_lNf(flags)));
> + flush_area(virt - PAGE_SIZE,
> + FLUSH_TLB_GLOBAL |
> FLUSH_ORDER(2*PAGETABLE_ORDER));
> + free_xen_pagetable(l3e_to_l2e(ol3e));
> + }
> + }
> +#endif
> }
>
> return 0;
> @@ -3844,6 +4006,40 @@ void destroy_xen_mappings(unsigned long
>
> while ( v < e )
> {
> +#ifdef __x86_64__
> + l3_pgentry_t *pl3e = virt_to_xen_l3e(v);
> +
> + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
> + {
> + v += 1UL << L3_PAGETABLE_SHIFT;
> + v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
> + continue;
> + }
> +
> + if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
> + {
> + if ( l2_table_offset(v) == 0 &&
> + l1_table_offset(v) == 0 &&
> + ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) )
> + {
> + /* PAGE1GB: whole superpage is destroyed. */
> + l3e_write_atomic(pl3e, l3e_empty());
> + v += 1UL << L3_PAGETABLE_SHIFT;
> + continue;
> + }
> +
> + /* PAGE1GB: shatter the superpage and fall through. */
> + pl2e = alloc_xen_pagetable();
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
> + l2e_write(pl2e + i,
> + l2e_from_pfn(l3e_get_pfn(*pl3e) +
> + (i << PAGETABLE_ORDER),
> + l3e_get_flags(*pl3e)));
> + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e),
> + __PAGE_HYPERVISOR));
> + }
> +#endif
> +
> pl2e = virt_to_xen_l2e(v);
>
> if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
> @@ -3896,6 +4092,23 @@ void destroy_xen_mappings(unsigned long
> free_xen_pagetable(pl1e);
> }
> }
> +
> +#ifdef __x86_64__
> + /* If we are done with the L3E, check if it is now empty. */
> + if ( (v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0) )
> + continue;
> + pl2e = l3e_to_l2e(*pl3e);
> + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
> + if ( l2e_get_intpte(pl2e[i]) != 0 )
> + break;
> + if ( i == L2_PAGETABLE_ENTRIES )
> + {
> + /* Empty: zap the L3E and free the L2 page. */
> + l3e_write_atomic(pl3e, l3e_empty());
> + flush_all(FLUSH_TLB_GLOBAL); /* flush before free */
> + free_xen_pagetable(pl2e);
> + }
> +#endif
> }
>
> flush_all(FLUSH_TLB_GLOBAL);
> Index: 2008-01-18/xen/arch/x86/setup.c
> ===================================================================
> --- 2008-01-18.orig/xen/arch/x86/setup.c 2008-01-23 15:39:18.000000000 +0100
> +++ 2008-01-18/xen/arch/x86/setup.c 2008-01-23 16:51:48.000000000 +0100
> @@ -672,8 +672,9 @@ void __init __start_xen(unsigned long mb
> pl3e = l4e_to_l3e(*pl4e);
> for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
> {
> - /* Not present or already relocated? */
> + /* Not present, 1Gb mapping, or already relocated? */
> if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
> + (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
> (l3e_get_pfn(*pl3e) > 0x1000) )
> continue;
> *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
> Index: 2008-01-18/xen/arch/x86/x86_64/mm.c
> ===================================================================
> --- 2008-01-18.orig/xen/arch/x86/x86_64/mm.c 2008-01-23 15:39:18.000000000
> +0100
> +++ 2008-01-18/xen/arch/x86/x86_64/mm.c 2008-01-23 11:56:42.000000000 +0100
> @@ -70,30 +70,36 @@ void *alloc_xen_pagetable(void)
> return mfn_to_virt(mfn);
> }
>
> -l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
> +l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
> {
> l4_pgentry_t *pl4e;
> - l3_pgentry_t *pl3e;
> - l2_pgentry_t *pl2e;
>
> pl4e = &idle_pg_table[l4_table_offset(v)];
> if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
> {
> - pl3e = alloc_xen_pagetable();
> + l3_pgentry_t *pl3e = alloc_xen_pagetable();
> +
> clear_page(pl3e);
> l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
> }
>
> - pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
> + return l4e_to_l3e(*pl4e) + l3_table_offset(v);
> +}
> +
> +l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
> +{
> + l3_pgentry_t *pl3e;
> +
> + pl3e = virt_to_xen_l3e(v);
> if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
> {
> - pl2e = alloc_xen_pagetable();
> + l2_pgentry_t *pl2e = alloc_xen_pagetable();
> +
> clear_page(pl2e);
> l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
> }
>
> - pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
> - return pl2e;
> + return l3e_to_l2e(*pl3e) + l2_table_offset(v);
> }
>
> void __init paging_init(void)
> Index: 2008-01-18/xen/arch/x86/x86_64/traps.c
> ===================================================================
> --- 2008-01-18.orig/xen/arch/x86/x86_64/traps.c 2008-01-23 15:39:18.000000000
> +0100
> +++ 2008-01-18/xen/arch/x86/x86_64/traps.c 2008-01-23 11:58:58.000000000 +0100
> @@ -148,9 +148,11 @@ void show_page_walk(unsigned long addr)
> mfn = l3e_get_pfn(l3e);
> pfn = mfn_valid(mfn) && mpt_valid ?
> get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
> - printk(" L3[0x%03lx] = %"PRIpte" %016lx\n",
> - l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
> - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
> + printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
> + l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
> + (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
> + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
> + (l3e_get_flags(l3e) & _PAGE_PSE) )
> return;
>
> l2t = mfn_to_virt(mfn);
> Index: 2008-01-18/xen/include/asm-x86/page.h
> ===================================================================
> --- 2008-01-18.orig/xen/include/asm-x86/page.h 2008-01-23 15:39:18.000000000
> +0100
> +++ 2008-01-18/xen/include/asm-x86/page.h 2008-01-22 15:35:32.000000000 +0100
> @@ -350,6 +350,9 @@ static inline int get_order_from_pages(u
> void *alloc_xen_pagetable(void);
> void free_xen_pagetable(void *v);
> l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
> +#ifdef __x86_64__
> +l3_pgentry_t *virt_to_xen_l3e(unsigned long v);
> +#endif
>
> /* Map machine page range in Xen virtual address space. */
> #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping
> */
> Index: 2008-01-18/xen/include/asm-x86/x86_64/page.h
> ===================================================================
> --- 2008-01-18.orig/xen/include/asm-x86/x86_64/page.h 2008-01-23
> 15:39:18.000000000 +0100
> +++ 2008-01-18/xen/include/asm-x86/x86_64/page.h 2008-01-23 11:14:54.000000000
> +0100
> @@ -59,6 +59,8 @@ typedef struct { intpte_t l3; } l3_pgent
> typedef struct { intpte_t l4; } l4_pgentry_t;
> typedef l4_pgentry_t root_pgentry_t;
>
> +extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
> +
> #endif /* !__ASSEMBLY__ */
>
> #define pte_read_atomic(ptep) (*(ptep))
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|