Pretty yukky and unlikely to improve performance on any current workload.
The scary errata around INVLPG on 2MB/4MB pages makes me think we should
just continue to avoid INVLPG for anything other than 4kB regions. Certainly
unless there is a demonstrable measurable performance loss for taking this
conservative approach. Anyway, you're basically turning off the cunning
goodness for all current Intel CPUs. :-)
-- Keir
On 21/8/07 16:25, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote:
> Folding into a single local handler and a single SMP multiplexor as
> well as adding capability to also flush caches through the same
> interfaces (a subsequent patch will make use of this).
>
> Once at changing cpuinfo_x86, this patch also removes several unused
> fields apparently inherited from Linux.
>
> The changes to xen/include/asm-x86/cpufeature.h apply cleanly only with
> the SVM/EFER patch (which was sent several days ago) applied.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
>
> Index: 2007-08-08/xen/arch/x86/cpu/common.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/cpu/common.c 2007-08-21 14:15:47.000000000
> +0200
> +++ 2007-08-08/xen/arch/x86/cpu/common.c 2007-08-08 12:03:19.000000000 +0200
> @@ -229,7 +229,6 @@ static void __init early_cpu_detect(void
> void __devinit generic_identify(struct cpuinfo_x86 * c)
> {
> u32 tfms, xlvl;
> - int junk;
>
> if (have_cpuid_p()) {
> /* Get vendor name */
> @@ -244,8 +243,8 @@ void __devinit generic_identify(struct c
>
> /* Intel-defined flags: level 0x00000001 */
> if ( c->cpuid_level >= 0x00000001 ) {
> - u32 capability, excap;
> - cpuid(0x00000001, &tfms, &junk, &excap, &capability);
> + u32 capability, excap, ebx;
> + cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
> c->x86_capability[0] = capability;
> c->x86_capability[4] = excap;
> c->x86 = (tfms >> 8) & 15;
> @@ -255,6 +254,8 @@ void __devinit generic_identify(struct c
> c->x86_model += ((tfms >> 16) & 0xF) << 4;
> }
> c->x86_mask = tfms & 15;
> + if ( cpu_has(c, X86_FEATURE_CLFLSH) )
> + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
> } else {
> /* Have CPUID level 0 only - unheard of */
> c->x86 = 4;
> @@ -272,6 +273,11 @@ void __devinit generic_identify(struct c
> }
> }
>
> +#ifdef CONFIG_X86_64
> + if ( cpu_has(c, X86_FEATURE_PAGE1GB) )
> + __set_bit(3, &c->invlpg_works_ok);
> +#endif
> +
> early_intel_workaround(c);
>
> #ifdef CONFIG_X86_HT
> @@ -313,6 +319,7 @@ void __devinit identify_cpu(struct cpuin
> c->x86_vendor_id[0] = '\0'; /* Unset */
> c->x86_model_id[0] = '\0'; /* Unset */
> c->x86_max_cores = 1;
> + c->x86_clflush_size = 0;
> memset(&c->x86_capability, 0, sizeof c->x86_capability);
>
> if (!have_cpuid_p()) {
> Index: 2007-08-08/xen/arch/x86/cpu/intel.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/cpu/intel.c 2007-08-21 14:15:47.000000000
> +0200
> +++ 2007-08-08/xen/arch/x86/cpu/intel.c 2007-08-21 12:17:32.000000000 +0200
> @@ -16,8 +16,6 @@
>
> #define select_idle_routine(x) ((void)0)
>
> -extern int trap_init_f00f_bug(void);
> -
> #ifdef CONFIG_X86_INTEL_USERCOPY
> /*
> * Alignment at which movsl is preferred for bulk memory copies.
> @@ -97,25 +95,6 @@ static void __devinit init_intel(struct
> unsigned int l2 = 0;
> char *p = NULL;
>
> -#ifdef CONFIG_X86_F00F_BUG
> - /*
> - * All current models of Pentium and Pentium with MMX technology CPUs
> - * have the F0 0F bug, which lets nonprivileged users lock up the system.
> - * Note that the workaround only should be initialized once...
> - */
> - c->f00f_bug = 0;
> - if ( c->x86 == 5 ) {
> - static int f00f_workaround_enabled = 0;
> -
> - c->f00f_bug = 1;
> - if ( !f00f_workaround_enabled ) {
> - trap_init_f00f_bug();
> - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround
> enabled.\n");
> - f00f_workaround_enabled = 1;
> - }
> - }
> -#endif
> -
> select_idle_routine(c);
> l2 = init_intel_cacheinfo(c);
>
> @@ -123,6 +102,16 @@ static void __devinit init_intel(struct
> if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
> clear_bit(X86_FEATURE_SEP, c->x86_capability);
>
> + if ( /* PentiumPro erratum 30 */
> + (c->x86 == 6 && c->x86_model == 1 && c->x86_mask < 9) ||
> + /* Dual-Core Intel Xeon 3000/5100 series erratum 89/90 */
> + /* Quad-Core Intel Xeon 3200/5300 series erratum 89/88 */
> + /* Intel Core2 erratum 89 */
> + (c->x86 == 6 && c->x86_model == 15 ) ||
> + /* Dual-Core Intel Xeon LV/ULV erratum 75 */
> + (c->x86 == 6 && c->x86_model == 14 ) )
> + __clear_bit(2, &c->invlpg_works_ok);
> +
> /* Names for the Pentium II/Celeron processors
> detectable only by also checking the cache size.
> Dixon is NOT a Celeron. */
> Index: 2007-08-08/xen/arch/x86/flushtlb.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/flushtlb.c 2007-08-21 14:15:47.000000000
> +0200
> +++ 2007-08-08/xen/arch/x86/flushtlb.c 2007-08-21 14:00:19.000000000 +0200
> @@ -95,26 +95,74 @@ void write_cr3(unsigned long cr3)
> local_irq_restore(flags);
> }
>
> -void local_flush_tlb(void)
> +void flush_one_local(const void *va, unsigned int flags)
> {
> - unsigned long flags;
> - u32 t;
> + const struct cpuinfo_x86 *c = ¤t_cpu_data;
> + unsigned int level = flags & FLUSH_LEVEL_MASK;
> + unsigned long irqfl;
>
> - /* This non-reentrant function is sometimes called in interrupt context.
> */
> - local_irq_save(flags);
> -
> - t = pre_flush();
> + if ( !c->x86 )
> + c = &boot_cpu_data;
>
> - hvm_flush_guest_tlbs();
> + /* This non-reentrant function is sometimes called in interrupt context.
> */
> + local_irq_save(irqfl);
>
> -#ifdef USER_MAPPINGS_ARE_GLOBAL
> - __pge_off();
> - __pge_on();
> -#else
> - __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
> + if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) )
> + {
> + if ( level > 0 &&
> + level < CONFIG_PAGING_LEVELS &&
> + test_bit(level, &c->invlpg_works_ok) )
> + __asm__ __volatile__( "invlpg %0"
> + :
> + : "m" (*(const char *)(va))
> + : "memory" );
> + else
> + {
> + u32 t = pre_flush();
> +
> + hvm_flush_guest_tlbs();
> +
> +#ifndef USER_MAPPINGS_ARE_GLOBAL
> + if ( !(flags & FLUSH_TLB_GLOBAL) ||
> + !(mmu_cr4_features & X86_CR4_PGE) )
> + __asm__ __volatile__ ( "mov %0, %%cr3"
> + :
> + : "r" (read_cr3())
> + : "memory" );
> + else
> #endif
> + {
> + __pge_off();
> + __pge_on();
> + }
> +
> + post_flush(t);
> + }
> + }
> +
> + if ( flags & FLUSH_CACHE )
> + {
> + unsigned long sz;
> +
> + if ( level > 0 && level < CONFIG_PAGING_LEVELS )
> + sz = 1UL << ((level - 1) * PAGETABLE_ORDER);
> + else
> + sz = ULONG_MAX;
> + if ( c->x86_clflush_size > 0 &&
> + c->x86_cache_size > 0 &&
> + sz < (c->x86_cache_size >> (PAGE_SHIFT - 10)) )
> + {
> + unsigned long i;
> +
> + va = (const void *)((unsigned long)va & ~(sz - 1));
> + for ( i = 0; i < sz; i += c->x86_clflush_size )
> + __asm__ __volatile__( "clflush %0"
> + :
> + : "m" (((const char *)va)[i]) );
> + }
> + else
> + wbinvd();
> + }
>
> - post_flush(t);
> -
> - local_irq_restore(flags);
> + local_irq_restore(irqfl);
> }
> Index: 2007-08-08/xen/arch/x86/mm.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/mm.c 2007-08-21 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/arch/x86/mm.c 2007-08-21 14:18:00.000000000 +0200
> @@ -3497,7 +3497,7 @@ int map_pages_to_xen(
>
> if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
> {
> - local_flush_tlb_pge();
> + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2);
> if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
> free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
> }
> @@ -3525,7 +3525,7 @@ int map_pages_to_xen(
> l2e_get_flags(*pl2e) &
> ~_PAGE_PSE));
> l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
> __PAGE_HYPERVISOR));
> - local_flush_tlb_pge();
> + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2);
> }
>
> pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
> @@ -3608,7 +3608,7 @@ void destroy_xen_mappings(unsigned long
> }
> }
>
> - flush_tlb_all_pge();
> + flush_global(FLUSH_TLB_GLOBAL);
> }
>
> void __set_fixmap(
> Index: 2007-08-08/xen/arch/x86/setup.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/setup.c 2007-08-21 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/arch/x86/setup.c 2007-08-21 11:06:23.000000000 +0200
> @@ -114,7 +114,7 @@ struct tss_struct init_tss[NR_CPUS];
>
> char __attribute__ ((__section__(".bss.stack_aligned")))
> cpu0_stack[STACK_SIZE];
>
> -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
> +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, 0x6, 1, -1 };
>
> #if CONFIG_PAGING_LEVELS > 2
> unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
> Index: 2007-08-08/xen/arch/x86/smp.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/smp.c 2007-08-21 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/arch/x86/smp.c 2007-08-21 12:20:28.000000000 +0200
> @@ -164,7 +164,8 @@ void send_IPI_mask_phys(cpumask_t mask,
>
> static DEFINE_SPINLOCK(flush_lock);
> static cpumask_t flush_cpumask;
> -static unsigned long flush_va;
> +static const void *flush_va;
> +static unsigned int flush_flags;
>
> fastcall void smp_invalidate_interrupt(void)
> {
> @@ -172,26 +173,18 @@ fastcall void smp_invalidate_interrupt(v
> perfc_incr(ipis);
> irq_enter();
> if ( !__sync_lazy_execstate() )
> - {
> - if ( flush_va == FLUSHVA_ALL )
> - local_flush_tlb();
> - else
> - local_flush_tlb_one(flush_va);
> - }
> + flush_one_local(flush_va, flush_flags);
> cpu_clear(smp_processor_id(), flush_cpumask);
> irq_exit();
> }
>
> -void __flush_tlb_mask(cpumask_t mask, unsigned long va)
> +void flush_one_mask(cpumask_t mask, const void *va, unsigned int flags)
> {
> ASSERT(local_irq_is_enabled());
>
> if ( cpu_isset(smp_processor_id(), mask) )
> {
> - if ( va == FLUSHVA_ALL )
> - local_flush_tlb();
> - else
> - local_flush_tlb_one(va);
> + flush_one_local(va, flags);
> cpu_clear(smp_processor_id(), mask);
> }
>
> @@ -200,6 +193,7 @@ void __flush_tlb_mask(cpumask_t mask, un
> spin_lock(&flush_lock);
> flush_cpumask = mask;
> flush_va = va;
> + flush_flags = flags;
> send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
> while ( !cpus_empty(flush_cpumask) )
> cpu_relax();
> @@ -215,24 +209,13 @@ void new_tlbflush_clock_period(void)
> /* Flush everyone else. We definitely flushed just before entry. */
> allbutself = cpu_online_map;
> cpu_clear(smp_processor_id(), allbutself);
> - __flush_tlb_mask(allbutself, FLUSHVA_ALL);
> + flush_mask(allbutself, FLUSH_TLB);
>
> /* No need for atomicity: we are the only possible updater. */
> ASSERT(tlbflush_clock == 0);
> tlbflush_clock++;
> }
>
> -static void flush_tlb_all_pge_ipi(void *info)
> -{
> - local_flush_tlb_pge();
> -}
> -
> -void flush_tlb_all_pge(void)
> -{
> - smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1);
> - local_flush_tlb_pge();
> -}
> -
> void smp_send_event_check_mask(cpumask_t mask)
> {
> cpu_clear(smp_processor_id(), mask);
> Index: 2007-08-08/xen/arch/x86/x86_32/mm.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/x86_32/mm.c 2007-08-21 14:15:47.000000000
> +0200
> +++ 2007-08-08/xen/arch/x86/x86_32/mm.c 2007-08-21 09:59:15.000000000 +0200
> @@ -149,7 +149,7 @@ void __init zap_low_mappings(l2_pgentry_
> /* Now zap mappings in the idle pagetables. */
> destroy_xen_mappings(0, HYPERVISOR_VIRT_START);
>
> - flush_tlb_all_pge();
> + flush_global(FLUSH_TLB_GLOBAL);
>
> /* Replace with mapping of the boot trampoline only. */
> map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
> Index: 2007-08-08/xen/arch/x86/x86_64/mm.c
> ===================================================================
> --- 2007-08-08.orig/xen/arch/x86/x86_64/mm.c 2007-08-21 14:15:47.000000000
> +0200
> +++ 2007-08-08/xen/arch/x86/x86_64/mm.c 2007-08-21 12:33:35.000000000 +0200
> @@ -205,7 +205,7 @@ void __init zap_low_mappings(void)
>
> /* Remove aliased mapping of first 1:1 PML4 entry. */
> l4e_write(&idle_pg_table[0], l4e_empty());
> - local_flush_tlb_pge();
> + flush_local(FLUSH_TLB_GLOBAL);
>
> /* Replace with mapping of the boot trampoline only. */
> map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
> Index: 2007-08-08/xen/include/asm-x86/cpufeature.h
> ===================================================================
> --- 2007-08-08.orig/xen/include/asm-x86/cpufeature.h 2007-08-21
> 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/include/asm-x86/cpufeature.h 2007-08-21 12:07:46.000000000
> +0200
> @@ -50,6 +50,7 @@
> #define X86_FEATURE_NX (1*32+20) /* Execute Disable */
> #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
> #define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */
> +#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */
> #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
> #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
> #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
> @@ -137,6 +138,7 @@
> #define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
> #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
> #define cpu_has_ffxsr 0
> +#define cpu_has_page1gb 0
> #else /* __x86_64__ */
> #define cpu_has_vme 0
> #define cpu_has_de 1
> @@ -161,6 +163,7 @@
> #define cpu_has_centaur_mcr 0
> #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
> #define cpu_has_ffxsr boot_cpu_has(X86_FEATURE_FFXSR)
> +#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB)
> #endif
>
> #endif /* __ASM_I386_CPUFEATURE_H */
> Index: 2007-08-08/xen/include/asm-x86/flushtlb.h
> ===================================================================
> --- 2007-08-08.orig/xen/include/asm-x86/flushtlb.h 2007-08-21
> 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/include/asm-x86/flushtlb.h 2007-08-21 12:34:36.000000000
> +0200
> @@ -15,6 +15,29 @@
> #include <xen/smp.h>
> #include <xen/types.h>
>
> +#define FLUSH_LEVEL_MASK 0x0f
> +#define FLUSH_TLB 0x10
> +#define FLUSH_TLB_GLOBAL 0x20
> +#define FLUSH_CACHE 0x40
> +
> +void flush_one_local(const void *va, unsigned int flags);
> +static inline void flush_local(unsigned int flags)
> +{
> + flush_one_local(NULL, flags | FLUSH_LEVEL_MASK);
> +}
> +#ifdef CONFIG_SMP
> +void flush_one_mask(cpumask_t, const void *va, unsigned int flags);
> +static inline void flush_mask(cpumask_t mask, unsigned int flags)
> +{
> + flush_one_mask(mask, NULL, flags | FLUSH_LEVEL_MASK);
> +}
> +#else
> +#define flush_mask(mask, flags) flush_local(flags)
> +#define flush_one_mask(mask, va, flags) flush_one_local(va, flags)
> +#endif
> +#define flush_global(flags) flush_mask(cpu_online_map, flags)
> +#define flush_one_global(va, flags) flush_one_mask(cpu_online_map, va, flags)
> +
> /* The current time as shown by the virtual TLB clock. */
> extern u32 tlbflush_clock;
>
> @@ -72,31 +95,20 @@ static inline unsigned long read_cr3(voi
> extern void write_cr3(unsigned long cr3);
>
> /* Flush guest mappings from the TLB and implicitly tick the tlbflush clock.
> */
> -extern void local_flush_tlb(void);
> -
> -#define local_flush_tlb_pge() \
> - do { \
> - __pge_off(); \
> - local_flush_tlb(); \
> - __pge_on(); \
> - } while ( 0 )
> +#define local_flush_tlb(void) flush_local(FLUSH_TLB)
>
> -#define local_flush_tlb_one(__addr) \
> - __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
> +#define local_flush_tlb_one(v) \
> + flush_one_local((const void *)(v), FLUSH_TLB|1)
>
> #define flush_tlb_all() flush_tlb_mask(cpu_online_map)
>
> #ifndef CONFIG_SMP
> -#define flush_tlb_all_pge() local_flush_tlb_pge()
> #define flush_tlb_mask(mask) local_flush_tlb()
> -#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v)
> +#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(v)
> #else
> #include <xen/smp.h>
> -#define FLUSHVA_ALL (~0UL)
> -extern void flush_tlb_all_pge(void);
> -extern void __flush_tlb_mask(cpumask_t mask, unsigned long va);
> -#define flush_tlb_mask(mask) __flush_tlb_mask(mask,FLUSHVA_ALL)
> -#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,(unsigned long)(v))
> +#define flush_tlb_mask(mask) flush_mask(mask, FLUSH_TLB)
> +#define flush_tlb_one_mask(mask,v) flush_one_mask(mask,(const void *)(v),
> FLUSH_TLB|1)
> #endif
>
> #endif /* __FLUSHTLB_H__ */
> Index: 2007-08-08/xen/include/asm-x86/processor.h
> ===================================================================
> --- 2007-08-08.orig/xen/include/asm-x86/processor.h 2007-08-21
> 14:15:47.000000000 +0200
> +++ 2007-08-08/xen/include/asm-x86/processor.h 2007-08-21 11:34:25.000000000
> +0200
> @@ -156,24 +156,20 @@ struct cpuinfo_x86 {
> __u8 x86_vendor; /* CPU vendor */
> __u8 x86_model;
> __u8 x86_mask;
> - char wp_works_ok; /* It doesn't on 386's */
> - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
> + __u8 invlpg_works_ok;
> char hard_math;
> - char rfu;
> int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
> unsigned int x86_capability[NCAPINTS];
> char x86_vendor_id[16];
> char x86_model_id[64];
> int x86_cache_size; /* in KB - valid for CPUS which support this call
> */
> int x86_cache_alignment; /* In bytes */
> - char fdiv_bug;
> - char f00f_bug;
> char coma_bug;
> - char pad0;
> int x86_power;
> unsigned char x86_max_cores; /* cpuid returned max cores value */
> unsigned char booted_cores; /* number of cores as seen by OS */
> unsigned char apicid;
> + unsigned short x86_clflush_size;
> } __cacheline_aligned;
>
> /*
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|