# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1254897939 -3600
# Node ID 9130b2b5dc99173f70f7cc45eeb7cc9d0f30411c
# Parent 0faa0b98256ec4ed7511567c3ededf5fa363cf99
Optimize memcpy for x86 arch. If source buffers does not start at a 64
bit boundary, copy a few bytes at the beginnig up to next 64-bit
boundary and then does an aligned copy for the remaining data. This
can reduce the copy cost by up to 50%.
Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
---
xen/include/asm-x86/string.h | 18 +++++++++++++++++-
1 files changed, 17 insertions(+), 1 deletion(-)
diff -r 0faa0b98256e -r 9130b2b5dc99 xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h Wed Oct 07 07:45:14 2009 +0100
+++ b/xen/include/asm-x86/string.h Wed Oct 07 07:45:39 2009 +0100
@@ -96,13 +96,29 @@ static always_inline void * __constant_m
}
#define __HAVE_ARCH_MEMCPY
+/* align source to a 64-bit boundary */
+static always_inline
+void *__var_memcpy(void *t, const void *f, size_t n)
+{
+ int off = (unsigned long)f & 0x7;
+ /* just do alignment if needed and if size is worth */
+ if ( (n > 32) && off ) {
+ size_t n1 = 8 - off;
+ __variable_memcpy(t, f, n1);
+ __variable_memcpy(t + n1, f + n1, n - n1);
+ return t;
+ } else {
+ return (__variable_memcpy(t, f, n));
+ }
+}
+
#define memcpy(t,f,n) (__memcpy((t),(f),(n)))
static always_inline
void *__memcpy(void *t, const void *f, size_t n)
{
return (__builtin_constant_p(n) ?
__constant_memcpy((t),(f),(n)) :
- __variable_memcpy((t),(f),(n)));
+ __var_memcpy((t),(f),(n)));
}
/* Some version of gcc don't have this builtin. It's non-critical anyway. */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|