Both IA64 and PowerPC have specialized (non-memset) versions for this,
so add one more than doubling performance of page clearing on not too
old processors (SSE2 supported).
While the patch also adds an SSE version, this is is currently orphaned
as I am not certain about the benefit of special casing idle VCPUs in
a few places (during context switching), so that at least in that
context using %xmmN registers would be possible without crashing and/or
corrupting guest state. The benefit of adding such support could be to
reduce scheduling latency when a VCPU is to transition out of idle, but
is busy doing page cleaning.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2007-06-18/xen/arch/x86/domain.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/domain.c 2007-06-04 08:35:35.000000000
+0200
+++ 2007-06-18/xen/arch/x86/domain.c 2007-06-18 11:57:46.000000000 +0200
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v,
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
- d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_arg_xlat_l3);
}
l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -429,7 +430,8 @@ int arch_domain_create(struct domain *d)
if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l2 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l2);
for ( i = 0; i < (1 << pdpt_order); i++ )
d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -437,7 +439,8 @@ int arch_domain_create(struct domain *d)
if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l3);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
__PAGE_HYPERVISOR);
Index: 2007-06-18/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_32/Makefile 2006-11-14
13:51:10.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_32/Makefile 2007-06-18 11:57:46.000000000
+0200
@@ -1,3 +1,4 @@
+obj-y += clear_page.o
obj-y += domain_page.o
obj-y += entry.o
obj-y += gpr_switch.o
Index: 2007-06-18/xen/arch/x86/x86_32/clear_page.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_32/clear_page.S 2007-06-18 11:57:46.000000000
+0200
@@ -0,0 +1,36 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+ movl 4(%esp), %edx
+ movl $PAGE_SIZE / 64, %ecx
+ xorps %xmm0, %xmm0
+
+0:
+ decl %ecx
+ movntps %xmm0, (%edx)
+ movntps %xmm0, 16(%edx)
+ movntps %xmm0, 32(%edx)
+ movntps %xmm0, 48(%edx)
+ leal 64(%edx), %edx
+ jnz 0b
+
+ sfence
+ ret
+
+ENTRY(clear_page_sse2)
+ movl 4(%esp), %edx
+ movl $PAGE_SIZE / 16, %ecx
+ xorl %eax, %eax
+
+0:
+ decl %ecx
+ movnti %eax, (%edx)
+ movnti %eax, 4(%edx)
+ movnti %eax, 8(%edx)
+ movnti %eax, 12(%edx)
+ leal 16(%edx), %edx
+ jnz 0b
+
+ sfence
+ ret
Index: 2007-06-18/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/Makefile 2007-02-12
14:00:54.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_64/Makefile 2007-06-18 11:57:46.000000000
+0200
@@ -1,12 +1,13 @@
subdir-y += compat
+obj-y += clear_page.o
obj-y += entry.o
-obj-y += compat_kexec.o
obj-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o
obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
obj-$(CONFIG_COMPAT) += domain.o
obj-$(CONFIG_COMPAT) += physdev.o
obj-$(CONFIG_COMPAT) += platform_hypercall.o
Index: 2007-06-18/xen/arch/x86/x86_64/clear_page.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_64/clear_page.S 2007-06-18 11:57:46.000000000
+0200
@@ -0,0 +1,34 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+ movl $PAGE_SIZE / 64, %ecx
+ xorps %xmm0, %xmm0
+
+0:
+ decl %ecx
+ movntps %xmm0, (%rdi)
+ movntps %xmm0, 16(%rdi)
+ movntps %xmm0, 32(%rdi)
+ movntps %xmm0, 48(%rdi)
+ leaq 64(%rdi), %rdi
+ jnz 0b
+
+ sfence
+ ret
+
+ENTRY(clear_page_sse2)
+ movl $PAGE_SIZE / 32, %ecx
+ xorl %eax, %eax
+
+0:
+ decl %ecx
+ movnti %rax, (%rdi)
+ movnti %rax, 8(%rdi)
+ movnti %rax, 16(%rdi)
+ movnti %rax, 24(%rdi)
+ leaq 32(%rdi), %rdi
+ jnz 0b
+
+ sfence
+ ret
Index: 2007-06-18/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/mm.c 2007-06-04 08:35:35.000000000
+0200
+++ 2007-06-18/xen/arch/x86/x86_64/mm.c 2007-06-18 11:57:46.000000000 +0200
@@ -106,7 +106,8 @@ void __init paging_init(void)
/* Create user-accessible L2 directory to map the MPT for guests. */
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l3_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l3_ro_mpt);
l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
@@ -132,7 +133,8 @@ void __init paging_init(void)
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
- l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(va)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
l3_ro_mpt =
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
Index: 2007-06-18/xen/include/asm-x86/page.h
===================================================================
--- 2007-06-18.orig/xen/include/asm-x86/page.h 2007-06-04 08:35:36.000000000
+0200
+++ 2007-06-18/xen/include/asm-x86/page.h 2007-06-18 11:57:46.000000000
+0200
@@ -214,9 +214,12 @@ typedef struct { u64 pfn; } pagetable_t;
#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
#define pagetable_null() pagetable_from_pfn(0)
-#endif
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+extern void clear_page_xmm(void *);
+extern void clear_page_sse2(void *);
+#define clear_page(_p) (cpu_has_xmm2 ? \
+ clear_page_sse2((void *)(_p)) : \
+ memset((void *)(_p), 0, PAGE_SIZE))
#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
#define mfn_valid(mfn) ((mfn) < max_page)
@@ -244,6 +247,7 @@ typedef struct { u64 pfn; } pagetable_t;
/* Convert between frame number and address formats. */
#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+#endif
/* High table entries are reserved by the hypervisor. */
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|