[PATCH] Re: [Xen-devel] [PATCH] Std VGA Performance

On Mon, 2007-10-29 at 19:17 +0000, Keir Fraser wrote:
> 
> All that's changed for ia64 is the definition of the buffered ioreq
> structure, which has become more densely packed. All the rest of the
> acceleration is (currently) x86-specific. So this shouldn't be too hard to
> track down...

   Yes, you're right, but easy to overlook, and I'm not sure how it
works on x86.  I copied the x86 code for filling in the buffered ioreq,
but failed to notice that it attempts to store 4 bytes of data into a 2
byte field...  The comment for the size entry in buf_ioreq could be
interpreted that only 1, 2, and 8 bytes are expected, but I definitely
see 4 bytes on occasion.  I'd guess x86 has a bug here that's simply not
exposed because of the 16bit code that's probably being used to
initialize VGA.  I also question the 8 byte support, which is why I
skipped it in the patch below.  Wouldn't an 8 byte MMIO access that
isn't a timeoffset be possible?  Keir, please apply this to the staging
tree.  Thanks,

        Alex

Signed-off-by: Alex Williamson <alex.williamson@xxxxxx>
--

diff -r 4034317507de xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c  Mon Oct 29 16:49:02 2007 +0000
+++ b/xen/arch/ia64/vmx/mmio.c  Tue Oct 30 10:03:42 2007 -0600
@@ -55,53 +55,68 @@ static int hvm_buffered_io_intercept(ior
 static int hvm_buffered_io_intercept(ioreq_t *p)
 {
     struct vcpu *v = current;
-    spinlock_t  *buffered_io_lock;
-    buffered_iopage_t *buffered_iopage =
+    buffered_iopage_t *pg =
         (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
-    unsigned long tmp_write_pointer = 0;
+    buf_ioreq_t bp;
     int i;
 
+    /* Ensure buffered_iopage fits in a page */
+    BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE);
+
     /* ignore READ ioreq_t! */
-    if ( p->dir == IOREQ_READ )
-        return 0;
-
-    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
-        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
-             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
-                                     hvm_buffered_io_ranges[i]->length )
+    if (p->dir == IOREQ_READ)
+        return 0;
+
+    for (i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++) {
+        if (p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+            p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+                                    hvm_buffered_io_ranges[i]->length)
             break;
     }
 
-    if ( i == HVM_BUFFERED_IO_RANGE_NR )
-        return 0;
-
-    buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
-    spin_lock(buffered_io_lock);
-
-    if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer ==
-         (unsigned long)IOREQ_BUFFER_SLOT_NUM ) {
+    if (i == HVM_BUFFERED_IO_RANGE_NR)
+        return 0;
+
+    bp.type = p->type;
+    bp.dir = p->dir;
+    switch (p->size) {
+    case 1:
+        bp.size = 0;
+        break;
+    case 2:
+        bp.size = 1;
+        break;
+    default:
+       /* Could use quad word semantics, but it only appears
+        * to be useful for timeoffset data. */
+        return 0;
+    }
+    bp.data = (uint16_t)p->data;
+    bp.addr = (uint32_t)p->addr;
+
+    spin_lock(&v->domain->arch.hvm_domain.buffered_io_lock);
+
+    if (pg->write_pointer - pg->read_pointer == IOREQ_BUFFER_SLOT_NUM) {
         /* the queue is full.
          * send the iopacket through the normal path.
          * NOTE: The arithimetic operation could handle the situation for
          * write_pointer overflow.
          */
-        spin_unlock(buffered_io_lock);
-        return 0;
-    }
-
-    tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM;
-
-    memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t));
-
-    /*make the ioreq_t visible before write_pointer*/
+        spin_unlock(&v->domain->arch.hvm_domain.buffered_io_lock);
+        return 0;
+    }
+
+    memcpy(&pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM],
+           &bp, sizeof(bp));
+
+    /* Make the ioreq_t visible before write_pointer */
     wmb();
-    buffered_iopage->write_pointer++;
-
-    spin_unlock(buffered_io_lock);
+    pg->write_pointer++;
+
+    spin_unlock(&v->domain->arch.hvm_domain.buffered_io_lock);
 
     return 1;
 }
-
 
 static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
 {
@@ -110,32 +125,36 @@ static void low_mmio_access(VCPU *vcpu, 
     ioreq_t *p;
 
     vio = get_vio(v->domain, v->vcpu_id);
-    if (vio == 0) {
-        panic_domain(NULL,"bad shared page: %lx", (unsigned long)vio);
-    }
+    if (!vio)
+        panic_domain(NULL, "bad shared page");
+
     p = &vio->vp_ioreq;
+
     p->addr = pa;
     p->size = s;
     p->count = 1;
+    if (dir == IOREQ_WRITE)
+        p->data = *val;
+    else
+        p->data = 0;
+    p->data_is_ptr = 0;
     p->dir = dir;
-    if (dir==IOREQ_WRITE)     // write;
-        p->data = *val;
-    else if (dir == IOREQ_READ)
-        p->data = 0;          // clear all bits
-    p->data_is_ptr = 0;
+    p->df = 0;
     p->type = 1;
-    p->df = 0;
 
     p->io_count++;
+
     if (hvm_buffered_io_intercept(p)) {
         p->state = STATE_IORESP_READY;
         vmx_io_assist(v);
-        return;
-    } else 
-        vmx_send_assist_req(v);
-    if (dir == IOREQ_READ) { // read
+        if (dir != IOREQ_READ)
+            return;
+    }
+
+    vmx_send_assist_req(v);
+    if (dir == IOREQ_READ)
         *val = p->data;
-    }
+
     return;
 }
 
@@ -227,16 +246,18 @@ static void legacy_io_access(VCPU *vcpu,
     ioreq_t *p;
 
     vio = get_vio(v->domain, v->vcpu_id);
-    if (vio == 0) {
-        panic_domain(NULL,"bad shared page\n");
-    }
+    if (!vio)
+        panic_domain(NULL, "bad shared page\n");
+
     p = &vio->vp_ioreq;
-    p->addr = TO_LEGACY_IO(pa&0x3ffffffUL);
+    p->addr = TO_LEGACY_IO(pa & 0x3ffffffUL);
     p->size = s;
     p->count = 1;
     p->dir = dir;
-    if (dir == IOREQ_WRITE)     // write;
+    if (dir == IOREQ_WRITE)
         p->data = *val;
+    else
+        p->data = 0;
     p->data_is_ptr = 0;
     p->type = 0;
     p->df = 0;




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

[PATCH] Re: [Xen-devel] [PATCH] Std VGA Performance