WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3/3] Add shadow VRAM

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 3/3] Add shadow VRAM
From: "Donald D. Dugger" <donald.d.dugger@xxxxxxxxx>
Date: Wed, 15 Mar 2006 13:57:00 -0800
Delivery-date: Wed, 15 Mar 2006 21:59:34 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.4.1i
Add a shadow VRAM to track changes to the real VRAM.  When the guest
OS was given write access to the VRAM the device model tracked all
VRAM changes by updating the entire screen on every output loop,
causing significant overhead (a CPU bound loop in a guest slows down
by about 35%) and significant mouse latency (VNC uses the same data
path for mouse events and video updates).  With the shadow VRAM only
modified pages need to be updated and the comparison of the shadow
VRAM to the real VRAM only adds ~4% overhead while eliminating the
mouse latencies.

Signed-off-by: Don Dugger <donald.d.dugger@xxxxxxxxx>

-- 
Don Dugger
"Censeo Toto nos in Kansa esse decisse." - D. Gale
Donald.D.Dugger@xxxxxxxxx
Ph: (303)440-1368


diff -r 7d8efd4f1ac7 tools/ioemu/configure
--- a/tools/ioemu/configure     Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/configure     Tue Mar 14 15:47:32 2006 -0700
@@ -160,6 +160,8 @@ for opt do
   ;;
   --fmod-inc=*) fmod_inc=${opt#--fmod-inc=}
   ;;
+  --disable-sse2) have_sse2="no"
+  ;;
   --disable-vnc) vnc="no"
   ;;
   --enable-mingw32) mingw32="yes" ; cross_prefix="i386-mingw32-"
@@ -225,6 +227,44 @@ if $cc -fno-reorder-blocks -fno-optimize
    have_gcc3_options="yes"
 fi
 
+# SSE2 - check if supported
+cat > $TMPC <<EOF
+#include <signal.h>
+#include <emmintrin.h>
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+       unsigned int eax, edx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=d" (edx)
+               : "0" (op)
+               : "bx", "cx");
+       return edx;
+}       
+
+void intr(int sig)
+{
+       exit(1);
+}
+
+int main(void) {
+       if ((cpuid_edx(1) & 0x4000000) == 0)
+               exit(1);
+       signal(SIGILL, intr);
+       __asm__("xorps %xmm0,%xmm0\n");
+       exit(0);
+}
+EOF
+
+if test -z "$have_sse2" ; then
+   have_sse2="no"
+   if $cc -msse2 -o $TMPE $TMPC 2> /dev/null ; then
+      if $TMPE ; then
+         have_sse2="yes"
+      fi
+   fi
+fi
+
 ##########################################
 # VNC probe
 
@@ -234,6 +274,29 @@ if libvncserver-config --version >& /dev
     vnc=yes
 else
     vnc=no
+fi
+
+fi
+
+if test "$vnc" = "yes" ; then
+
+# check for eager event handling
+cat > $TMPC <<EOF
+#include "rfb/rfb.h"
+int main(void) {
+       rfbScreenInfoPtr screen;
+
+       screen->handleEventsEagerly = 1;
+}
+EOF
+
+if $cc `libvncserver-config --cflags` -o $TMPO $TMPC 2> /dev/null ; then
+   have_eager_events="yes"
+else
+   echo "!!"
+   echo "!! Slow VNC mouse, LibVNCServer doesn't support eager events"
+   echo "!!"
+   have_eager_events="no"
 fi
 
 fi
@@ -315,6 +378,7 @@ echo "  --enable-fmod            enable 
 echo "  --enable-fmod            enable FMOD audio output driver"
 echo "  --fmod-lib               path to FMOD library"
 echo "  --fmod-inc               path to FMOD includes"
+echo "  --disable-sse2           disable use of SSE2 instructions"
 echo ""
 echo "NOTE: The object files are build at the place where configure is 
launched"
 exit 1
@@ -361,6 +425,7 @@ echo "VNC support       $vnc"
 echo "VNC support       $vnc"
 echo "SDL support       $sdl"
 echo "SDL static link   $sdl_static"
+echo "SSE2 support      $have_sse2"
 echo "mingw32 support   $mingw32"
 echo "Adlib support     $adlib"
 echo -n "FMOD support      $fmod"
@@ -392,6 +457,9 @@ echo "configdir=$configdir" >> $config_m
 echo "configdir=$configdir" >> $config_mak
 echo "LIBDIR=$libdir" >> $config_mak
 echo "#define CONFIG_QEMU_SHAREDIR \"$datadir\"" >> $config_h
+if test "$have_sse2" = "yes" ; then
+  echo "LOCAL_CFLAGS=-msse2" >>$config_mak
+fi
 #echo "MAKE=$make" >> $config_mak
 #echo "CC=$cc" >> $config_mak
 #if test "$have_gcc3_options" = "yes" ; then
@@ -458,6 +526,10 @@ echo "SRC_PATH=$source_path" >> $config_
 echo "SRC_PATH=$source_path" >> $config_mak
 echo "TARGET_DIRS=$target_list" >> $config_mak
 
+if test "$have_sse2" = "yes" ; then
+  echo "#define USE_SSE2 1" >>$config_h
+fi
+
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
   echo "#define O_LARGEFILE 0" >> $config_h
@@ -472,6 +544,9 @@ if test "$vnc" = "yes"; then
     vnc_cflags="/usr/include"
   fi
   echo "VNC_CFLAGS=$vnc_cflags" >> $config_mak
+  if test "$have_eager_events" = "yes" ; then
+    echo "#define VNC_EAGER_EVENTS 1" >> $config_h
+  fi
 fi
 
 if test "$sdl" = "yes"; then
diff -r 7d8efd4f1ac7 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/hw/vga.c      Tue Mar 14 15:47:32 2006 -0700
@@ -1340,6 +1340,100 @@ void vga_invalidate_scanlines(VGAState *
     }
 }
 
+extern inline int cmp_vram(VGAState *s, int offset, int n)
+{
+       long *vp, *sp;
+
+       if (s->vram_shadow == NULL)
+               return 1;
+       vp = (long *)(s->vram_ptr + offset);
+       sp = (long *)(s->vram_shadow + offset);
+       while ((n -= sizeof(*vp)) >= 0)
+               if (*vp++ != *sp++) {
+                       memcpy(sp - 1, vp - 1, n + sizeof(*vp));
+                       return 1;
+               }
+       return 0;
+}
+
+#ifdef USE_SSE2
+#include <signal.h>
+#include <setjmp.h>
+#include <emmintrin.h>
+
+int sse2_ok = 1;
+
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+       unsigned int eax, edx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=d" (edx)
+               : "0" (op)
+               : "bx", "cx");
+       return edx;
+}
+
+jmp_buf sse_jbuf;
+
+void intr(int sig)
+{
+
+       sse2_ok = 0;
+       longjmp(sse_jbuf, 1);
+       return;
+}
+
+void check_sse2()
+{
+
+       if ((cpuid_edx(1) & 0x4000000) == 0) {
+               sse2_ok = 0;
+               return;
+       }
+       signal(SIGILL, intr);
+       if (setjmp(sse_jbuf) == 0)
+               __asm__("xorps %xmm0,%xmm0\n");
+       return;
+}
+
+int vram_dirty(VGAState *s, int offset, int n)
+{
+       __m128i *sp, *vp;
+
+       if (s->vram_shadow == NULL)
+               return 1;
+       if (sse2_ok == 0)
+               return cmp_vram(s, offset, n);
+       vp = (__m128i *)(s->vram_ptr + offset);
+       sp = (__m128i *)(s->vram_shadow + offset);
+       while ((n -= sizeof(*vp)) >= 0) {
+               if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) {
+                       while (n >= 0) {
+                               _mm_store_si128(sp++, _mm_load_si128(vp++));
+                               n -= sizeof(*vp);
+                       }
+                       return 1;
+               }
+               sp++;
+               vp++;
+       }
+       return 0;
+}
+#else  // USE_SSE2
+int vram_dirty(VGAState *s, int offset, int n)
+{
+
+       return cmp_vram(s, offset, n);
+}
+
+void check_sse2()
+{
+
+       return;
+}
+#endif // USE_SSE2
+
 /* 
  * graphic modes
  */
@@ -1434,6 +1528,9 @@ static void vga_draw_graphic(VGAState *s
     printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x 
linecmp=%d sr[0x01]=0x%02x\n",
            width, height, v, line_offset, s->cr[9], s->cr[0x17], 
s->line_compare, s->sr[0x01]);
 #endif
+    for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
+       if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+           cpu_physical_memory_set_dirty(s->vram_offset + y);
     addr1 = (s->start_addr * 4);
     bwidth = width * 4;
     y_start = -1;
@@ -1536,8 +1633,17 @@ static void vga_draw_blank(VGAState *s, 
 
 void vga_update_display(void)
 {
+    static int loop;
     VGAState *s = vga_state;
     int full_update, graphic_mode;
+
+    /*
+     *  Only update the display every other time.  The responsiveness is
+     *    acceptable and it cuts down on the overhead of the VRAM compare
+     *    in `vram_dirty'
+     */
+    if (loop++ & 1)
+       return;
 
     if (s->ds->depth == 0) {
         /* nothing to do */
@@ -1569,7 +1675,6 @@ void vga_update_display(void)
             full_update = 1;
         }
 
-        full_update = 1;
         switch(graphic_mode) {
         case GMODE_TEXT:
             vga_draw_text(s, full_update);
@@ -1874,7 +1979,10 @@ void vga_common_init(VGAState *s, Displa
 #else
     s->vram_ptr = qemu_malloc(vga_ram_size);
 #endif
-
+    check_sse2();
+    if ((s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1)) == 
NULL)
+       fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, mouse will 
be slow\n", vga_ram_size);
+    s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1) 
& ~(TARGET_PAGE_SIZE - 1));
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
     s->ds = ds;
diff -r 7d8efd4f1ac7 tools/ioemu/hw/vga_int.h
--- a/tools/ioemu/hw/vga_int.h  Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/hw/vga_int.h  Tue Mar 14 15:47:32 2006 -0700
@@ -76,6 +76,7 @@
 
 #define VGA_STATE_COMMON                                                \
     uint8_t *vram_ptr;                                                  \
+    uint8_t *vram_shadow;                                              \
     unsigned long vram_offset;                                          \
     unsigned int vram_size;                                             \
     uint32_t latch;                                                     \
diff -r 7d8efd4f1ac7 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile       Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/target-i386-dm/Makefile       Tue Mar 14 15:47:32 2006 -0700
@@ -13,7 +13,7 @@ VPATH+=:$(SRC_PATH)/linux-user
 VPATH+=:$(SRC_PATH)/linux-user
 DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH)
 endif
-CFLAGS+=-g -fno-strict-aliasing
+CFLAGS+=-g -fno-strict-aliasing $(LOCAL_CFLAGS)
 LDFLAGS=-g
 LIBS=
 HELPER_CFLAGS=$(CFLAGS)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>