# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1226491455 0
# Node ID 8de4b4e9a435cea9b8e85863fcb832c213281076
# Parent 8e18dd41c6c7bb0980b29393b275c564cfb96437
x86: add SSE-based copy_page()
In top of the highmem asstance hypercalls added earlier, this provides
a performance improvement of another 12% (measured on Xeon E5345) for
the page copying case.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/Makefile | 1
xen/arch/x86/copy_page.S | 66 ++++++++++++++++++++++++++++++++++++++++++++
xen/arch/x86/domain.c | 3 +-
xen/arch/x86/domain_build.c | 5 ++-
xen/include/asm-x86/page.h | 5 ++-
5 files changed, 76 insertions(+), 4 deletions(-)
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/Makefile Wed Nov 12 12:04:15 2008 +0000
@@ -11,6 +11,7 @@ obj-y += apic.o
obj-y += apic.o
obj-y += bitops.o
obj-y += clear_page.o
+obj-y += copy_page.o
obj-y += compat.o
obj-y += delay.o
obj-y += dmi_scan.o
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/copy_page.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/copy_page.S Wed Nov 12 12:04:15 2008 +0000
@@ -0,0 +1,66 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define src_reg %esi
+#define dst_reg %edi
+#define WORD_SIZE 4
+#define tmp1_reg %eax
+#define tmp2_reg %edx
+#define tmp3_reg %ebx
+#define tmp4_reg %ebp
+#else
+#define src_reg %rsi
+#define dst_reg %rdi
+#define WORD_SIZE 8
+#define tmp1_reg %r8
+#define tmp2_reg %r9
+#define tmp3_reg %r10
+#define tmp4_reg %r11
+#endif
+
+ENTRY(copy_page_sse2)
+#ifdef __i386__
+ push %ebx
+ push %ebp
+ push %esi
+ push %edi
+ mov 6*4(%esp), src_reg
+ mov 5*4(%esp), dst_reg
+#endif
+ mov $PAGE_SIZE/(4*WORD_SIZE)-3, %ecx
+
+ prefetchnta 2*4*WORD_SIZE(src_reg)
+ mov (src_reg), tmp1_reg
+ mov WORD_SIZE(src_reg), tmp2_reg
+ mov 2*WORD_SIZE(src_reg), tmp3_reg
+ mov 3*WORD_SIZE(src_reg), tmp4_reg
+
+0: prefetchnta 3*4*WORD_SIZE(src_reg)
+1: add $4*WORD_SIZE, src_reg
+ movnti tmp1_reg, (dst_reg)
+ mov (src_reg), tmp1_reg
+ dec %ecx
+ movnti tmp2_reg, WORD_SIZE(dst_reg)
+ mov WORD_SIZE(src_reg), tmp2_reg
+ movnti tmp3_reg, 2*WORD_SIZE(dst_reg)
+ mov 2*WORD_SIZE(src_reg), tmp3_reg
+ movnti tmp4_reg, 3*WORD_SIZE(dst_reg)
+ lea 4*WORD_SIZE(dst_reg), dst_reg
+ mov 3*WORD_SIZE(src_reg), tmp4_reg
+ jg 0b
+ jpe 1b
+
+ movnti tmp1_reg, (dst_reg)
+ movnti tmp2_reg, WORD_SIZE(dst_reg)
+ movnti tmp3_reg, 2*WORD_SIZE(dst_reg)
+ movnti tmp4_reg, 3*WORD_SIZE(dst_reg)
+
+#ifdef __i386__
+ pop %edi
+ pop %esi
+ pop %ebp
+ pop %ebx
+#endif
+ sfence
+ ret
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/domain.c Wed Nov 12 12:04:15 2008 +0000
@@ -184,7 +184,8 @@ static int setup_compat_l4(struct vcpu *
/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
- l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+ l4tab = page_to_virt(pg);
+ copy_page(l4tab, idle_pg_table);
l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/domain_build.c Wed Nov 12 12:04:15 2008 +0000
@@ -455,8 +455,9 @@ int __init construct_dom0(
/* WARNING: The new domain must have its 'processor' field filled in! */
l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
- memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
+ copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+ idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/include/asm-x86/page.h Wed Nov 12 12:04:15 2008 +0000
@@ -215,7 +215,10 @@ void clear_page_sse2(void *);
#define clear_page(_p) (cpu_has_xmm2 ? \
clear_page_sse2((void *)(_p)) : \
(void)memset((void *)(_p), 0, PAGE_SIZE))
-#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+void copy_page_sse2(void *, const void *);
+#define copy_page(_t,_f) (cpu_has_xmm2 ? \
+ copy_page_sse2(_t, _f) : \
+ (void)memcpy(_t, _f, PAGE_SIZE))
#define mfn_valid(mfn) ((mfn) < max_page)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|