WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86: introduce specialized clear_page()

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86: introduce specialized clear_page()
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Tue, 19 Jun 2007 11:16:37 +0100
Delivery-date: Tue, 19 Jun 2007 03:14:03 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Both IA64 and PowerPC have specialized (non-memset) versions for this,
so add one more than doubling performance of page clearing on not too
old processors (SSE2 supported).

While the patch also adds an SSE version, this is is currently orphaned
as I am not certain about the benefit of special casing idle VCPUs in
a few places (during context switching), so that at least in that
context using %xmmN registers would be possible without crashing and/or
corrupting guest state. The benefit of adding such support could be to
reduce scheduling latency when a VCPU is to transition out of idle, but
is busy doing page cleaning.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-06-18/xen/arch/x86/domain.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/domain.c       2007-06-04 08:35:35.000000000 
+0200
+++ 2007-06-18/xen/arch/x86/domain.c    2007-06-18 11:57:46.000000000 +0200
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
         pg = alloc_domheap_page(NULL);
         if ( !pg )
             return -ENOMEM;
-        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+        clear_page(d->arch.mm_arg_xlat_l3);
     }
 
     l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -429,7 +430,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l2 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l2);
     for ( i = 0; i < (1 << pdpt_order); i++ )
         d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -437,7 +439,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l3 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l3);
     d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
         l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
                             __PAGE_HYPERVISOR);
Index: 2007-06-18/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_32/Makefile        2006-11-14 
13:51:10.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_32/Makefile     2007-06-18 11:57:46.000000000 
+0200
@@ -1,3 +1,4 @@
+obj-y += clear_page.o
 obj-y += domain_page.o
 obj-y += entry.o
 obj-y += gpr_switch.o
Index: 2007-06-18/xen/arch/x86/x86_32/clear_page.S
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_32/clear_page.S 2007-06-18 11:57:46.000000000 
+0200
@@ -0,0 +1,36 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+       movl    4(%esp), %edx
+       movl    $PAGE_SIZE / 64, %ecx
+       xorps   %xmm0, %xmm0
+
+0:
+       decl    %ecx
+       movntps %xmm0, (%edx)
+       movntps %xmm0, 16(%edx)
+       movntps %xmm0, 32(%edx)
+       movntps %xmm0, 48(%edx)
+       leal    64(%edx), %edx
+       jnz     0b
+
+       sfence
+       ret
+
+ENTRY(clear_page_sse2)
+       movl    4(%esp), %edx
+       movl    $PAGE_SIZE / 16, %ecx
+       xorl    %eax, %eax
+
+0:
+       decl    %ecx
+       movnti  %eax, (%edx)
+       movnti  %eax, 4(%edx)
+       movnti  %eax, 8(%edx)
+       movnti  %eax, 12(%edx)
+       leal    16(%edx), %edx
+       jnz     0b
+
+       sfence
+       ret
Index: 2007-06-18/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/Makefile        2007-02-12 
14:00:54.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_64/Makefile     2007-06-18 11:57:46.000000000 
+0200
@@ -1,12 +1,13 @@
 subdir-y += compat
 
+obj-y += clear_page.o
 obj-y += entry.o
-obj-y += compat_kexec.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
 
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
Index: 2007-06-18/xen/arch/x86/x86_64/clear_page.S
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_64/clear_page.S 2007-06-18 11:57:46.000000000 
+0200
@@ -0,0 +1,34 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+       movl    $PAGE_SIZE / 64, %ecx
+       xorps   %xmm0, %xmm0
+
+0:
+       decl    %ecx
+       movntps %xmm0, (%rdi)
+       movntps %xmm0, 16(%rdi)
+       movntps %xmm0, 32(%rdi)
+       movntps %xmm0, 48(%rdi)
+       leaq    64(%rdi), %rdi
+       jnz     0b
+
+       sfence
+       ret
+
+ENTRY(clear_page_sse2)
+       movl    $PAGE_SIZE / 32, %ecx
+       xorl    %eax, %eax
+
+0:
+       decl    %ecx
+       movnti  %rax, (%rdi)
+       movnti  %rax, 8(%rdi)
+       movnti  %rax, 16(%rdi)
+       movnti  %rax, 24(%rdi)
+       leaq    32(%rdi), %rdi
+       jnz     0b
+
+       sfence
+       ret
Index: 2007-06-18/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/mm.c    2007-06-04 08:35:35.000000000 
+0200
+++ 2007-06-18/xen/arch/x86/x86_64/mm.c 2007-06-18 11:57:46.000000000 +0200
@@ -106,7 +106,8 @@ void __init paging_init(void)
     /* Create user-accessible L2 directory to map the MPT for guests. */
     if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
         goto nomem;
-    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+    l3_ro_mpt = page_to_virt(l2_pg);
+    clear_page(l3_ro_mpt);
     l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
               l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
 
@@ -132,7 +133,8 @@ void __init paging_init(void)
             if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
                 goto nomem;
             va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
-            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+            l2_ro_mpt = page_to_virt(l2_pg);
+            clear_page(l2_ro_mpt);
             l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
         l3_ro_mpt = 
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
         if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
             goto nomem;
-        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+        clear_page(l2_ro_mpt);
         l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
                   l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
         l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
Index: 2007-06-18/xen/include/asm-x86/page.h
===================================================================
--- 2007-06-18.orig/xen/include/asm-x86/page.h  2007-06-04 08:35:36.000000000 
+0200
+++ 2007-06-18/xen/include/asm-x86/page.h       2007-06-18 11:57:46.000000000 
+0200
@@ -214,9 +214,12 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
 #define pagetable_null()        pagetable_from_pfn(0)
-#endif
 
-#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
+extern void clear_page_xmm(void *);
+extern void clear_page_sse2(void *);
+#define clear_page(_p)      (cpu_has_xmm2 ? \
+                             clear_page_sse2((void *)(_p)) : \
+                             memset((void *)(_p), 0, PAGE_SIZE))
 #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
 
 #define mfn_valid(mfn)      ((mfn) < max_page)
@@ -244,6 +247,7 @@ typedef struct { u64 pfn; } pagetable_t;
 /* Convert between frame number and address formats.  */
 #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
 #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+#endif
 
 /* High table entries are reserved by the hypervisor. */
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>