[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH for-4.21 1/2] x86/AMD: avoid REP MOVSB for Zen3/4



Le 25/09/2025 à 12:48, Jan Beulich a écrit :
> Along with Zen2 (which doesn't expose ERMS), both families reportedly
> suffer from sub-optimal aliasing detection when deciding whether REP MOVSB
> can actually be carried out the accelerated way. Therefore we want to
> avoid its use in the common case (memset(), copy_page_hot()).

s/memset/memcpy (memset probably uses rep stosb which is not affected IIUC)

>
> Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> Question is whether merely avoiding REP MOVSB (but not REP MOVSQ) is going
> to be good enough.
>

This probably wants to be checked with benchmarks of rep movsb vs rep
movsq+b (current non-ERMS algorithm). If the issue also occurs with rep
movsq, it may be preferable to keep rep movsb even considering this issue.

> --- a/xen/arch/x86/copy_page.S
> +++ b/xen/arch/x86/copy_page.S
> @@ -57,6 +57,6 @@ END(copy_page_cold)
>           .endm
>
>   FUNC(copy_page_hot)
> -        ALTERNATIVE copy_page_movsq, copy_page_movsb, X86_FEATURE_ERMS
> +        ALTERNATIVE copy_page_movsq, copy_page_movsb, 
> X86_FEATURE_XEN_REP_MOVSB
>           RET
>   END(copy_page_hot)
> --- a/xen/arch/x86/cpu/amd.c
> +++ b/xen/arch/x86/cpu/amd.c
> @@ -1386,6 +1386,10 @@ static void cf_check init_amd(struct cpu
>
>       check_syscfg_dram_mod_en();
>
> +     if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS)
> +         && c->family != 0x19 /* Zen3/4 */)
> +             setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
> +

May it be fixed through a (future ?) microcode update, especially since
rep movs is microcoded on these archs ?

>       amd_log_freq(c);
>   }
>
> --- a/xen/arch/x86/cpu/intel.c
> +++ b/xen/arch/x86/cpu/intel.c
> @@ -684,6 +684,9 @@ static void cf_check init_intel(struct c
>        */
>       if (c == &boot_cpu_data && c->vfm == INTEL_SKYLAKE_X)
>               setup_clear_cpu_cap(X86_FEATURE_CLWB);
> +
> +     if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS))
> +             setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
>   }
>
>   const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = {
> --- a/xen/arch/x86/include/asm/cpufeatures.h
> +++ b/xen/arch/x86/include/asm/cpufeatures.h
> @@ -7,7 +7,7 @@
>   #define FSCAPINTS FEATURESET_NR_ENTRIES
>
>   /* Synthetic words follow the featureset words. */
> -#define X86_NR_SYNTH 1
> +#define X86_NR_SYNTH 2
>   #define X86_SYNTH(x) (FSCAPINTS * 32 + (x))
>
>   /* Synthetic features */
> @@ -43,6 +43,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_PV,     X86_SY
>   XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by 
> Xen for HVM */
>   XEN_CPUFEATURE(USE_VMCALL,        X86_SYNTH(30)) /* Use VMCALL instead of 
> VMMCALL */
>   XEN_CPUFEATURE(PDX_COMPRESSION,   X86_SYNTH(31)) /* PDX compression */
> +XEN_CPUFEATURE(XEN_REP_MOVSB,     X86_SYNTH(32)) /* REP MOVSB used for 
> memcpy() and alike. */
>
>   /* Bug words follow the synthetic words. */
>   #define X86_NR_BUG 1
> --- a/xen/arch/x86/memcpy.S
> +++ b/xen/arch/x86/memcpy.S
> @@ -10,7 +10,7 @@ FUNC(memcpy)
>            * precautions were taken).
>            */
>           ALTERNATIVE "and $7, %edx; shr $3, %rcx", \
> -                    STR(rep movsb; RET), X86_FEATURE_ERMS
> +                    STR(rep movsb; RET), X86_FEATURE_XEN_REP_MOVSB
>           rep movsq
>           or      %edx, %ecx
>           jz      1f
>
>

Teddy


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech





 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.