WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2 of 4] Allow xentrace to handle >4G of trace data

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 2 of 4] Allow xentrace to handle >4G of trace data
From: Michael.Fetterman@xxxxxxxxxxxx
Date: Sat, 09 Feb 2008 21:22:28 +0000
Delivery-date: Sat, 09 Feb 2008 13:28:59 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1202592146@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Michael.Fetterman@xxxxxxxxxxxx
# Date 1202591469 0
# Node ID 9ff80e7b54528e06c8e0dc0ad8d8da09afca7f16
# Parent  91b4a9b2629c770bafca0d58bb106ce72a3a968f
Allow xentrace to handle >4G of trace data.
It was previously assert'ing when it hit 4G.

Also, because the trace buffer is not a power of 2 in size,
using modulo arithmetic to address the buffer does not work
when the index wraps around 2^32.

This patch fixes both issues, and as a side effect, removes all
integer division from the hypervisor side of the trace mechanism.

Signed-off-by: Michael A Fetterman <Michael.Fetterman@xxxxxxxxxxxx>

diff -r 91b4a9b2629c -r 9ff80e7b5452 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Sat Feb 09 21:11:08 2008 +0000
+++ b/tools/xentrace/xentrace.c Sat Feb 09 21:11:09 2008 +0000
@@ -362,9 +362,18 @@ int monitor_tbufs(int outfd)
             if ( cons == prod )
                 continue;
            
-            assert(prod > cons);
-
-            window_size = prod - cons;
+            assert(cons < 2*data_size);
+            assert(prod < 2*data_size);
+
+            // NB: if (prod<cons), then (prod-cons)%data_size will not yield
+            // the correct answer because data_size is not a power of 2.
+            if ( prod < cons )
+                window_size = (prod + 2*data_size) - cons;
+            else
+                window_size = prod - cons;
+            assert(window_size > 0);
+            assert(window_size <= data_size);
+
             start_offset = cons % data_size;
             end_offset = prod % data_size;
 
diff -r 91b4a9b2629c -r 9ff80e7b5452 xen/common/trace.c
--- a/xen/common/trace.c        Sat Feb 09 21:11:08 2008 +0000
+++ b/xen/common/trace.c        Sat Feb 09 21:11:09 2008 +0000
@@ -239,14 +239,46 @@ static inline int calc_rec_size(int cycl
     return rec_size;
 }
 
+static inline int calc_unconsumed_bytes(struct t_buf *buf)
+{
+    int x = buf->prod - buf->cons;
+    if ( x < 0 )
+        x += 2*data_size;
+
+    ASSERT(x >= 0);
+    ASSERT(x <= data_size);
+
+    return x;
+}
+
 static inline int calc_bytes_to_wrap(struct t_buf *buf)
 {
-    return data_size - (buf->prod % data_size);
-}
-
-static inline unsigned calc_bytes_avail(struct t_buf *buf)
-{
-    return data_size - (buf->prod - buf->cons);
+    int x = data_size - buf->prod;
+    if ( x <= 0 )
+        x += data_size;
+
+    ASSERT(x > 0);
+    ASSERT(x <= data_size);
+
+    return x;
+}
+
+static inline int calc_bytes_avail(struct t_buf *buf)
+{
+    return data_size - calc_unconsumed_bytes(buf);
+}
+
+static inline struct t_rec *
+next_record(struct t_buf *buf)
+{
+    int x = buf->prod;
+    if ( x >= data_size )
+        x -= data_size;
+
+    ASSERT(x >= 0);
+    ASSERT(x < data_size);
+
+    return (struct t_rec *)&this_cpu(t_data)[x];
 }
 
 static inline int __insert_record(struct t_buf *buf,
@@ -260,24 +292,25 @@ static inline int __insert_record(struct
     unsigned char *dst;
     unsigned long extra_word = extra/sizeof(u32);
     int local_rec_size = calc_rec_size(cycles, extra);
+    uint32_t next;
 
     BUG_ON(local_rec_size != rec_size);
+    BUG_ON(extra & 3);
 
     /* Double-check once more that we have enough space.
      * Don't bugcheck here, in case the userland tool is doing
      * something stupid. */
     if ( calc_bytes_avail(buf) < rec_size )
     {
-        printk("%s: %u bytes left (%u - (%u - %u)) recsize %u.\n",
+        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
                __func__,
-               data_size - (buf->prod - buf->cons),
-               data_size,
-               buf->prod, buf->cons, rec_size);
+               calc_bytes_avail(buf),
+               data_size, buf->prod, buf->cons, data_size, rec_size);
         return 0;
     }
     rmb();
 
-    rec = (struct t_rec *)&this_cpu(t_data)[buf->prod % data_size];
+    rec = next_record(buf);
     rec->event = event;
     rec->extra_u32 = extra_word;
     dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -293,7 +326,13 @@ static inline int __insert_record(struct
         memcpy(dst, extra_data, extra);
 
     wmb();
-    buf->prod += rec_size;
+
+    next = buf->prod + rec_size;
+    if ( next >= 2*data_size )
+        next -= 2*data_size;
+    ASSERT(next >= 0);
+    ASSERT(next < 2*data_size);
+    buf->prod = next;
 
     return rec_size;
 }
@@ -395,7 +434,7 @@ void __trace_var(u32 event, int cycles, 
 
     local_irq_save(flags);
 
-    started_below_highwater = ((buf->prod - buf->cons) < t_buf_highwater);
+    started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
 
     /* Calculate the record size */
     rec_size = calc_rec_size(cycles, extra);
@@ -413,10 +452,6 @@ void __trace_var(u32 event, int cycles, 
     total_size = 0;
 
     /* First, check to see if we need to include a lost_record.
-     *
-     * calc_bytes_to_wrap() involves integer division, which we'd like to
-     * avoid if we can.  So do the math, check it in debug versions, and
-     * do a final check always if we happen to write a record.
      */
     if ( this_cpu(lost_records) )
     {
@@ -477,7 +512,7 @@ void __trace_var(u32 event, int cycles, 
 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
     if ( started_below_highwater &&
-         ((buf->prod - buf->cons) >= t_buf_highwater) )
+         (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
         raise_softirq(TRACE_SOFTIRQ);
 }
 
diff -r 91b4a9b2629c -r 9ff80e7b5452 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Sat Feb 09 21:11:08 2008 +0000
+++ b/xen/include/public/trace.h        Sat Feb 09 21:11:09 2008 +0000
@@ -221,6 +221,14 @@ struct t_rec {
  * field, indexes into an array of struct t_rec's.
  */
 struct t_buf {
+    /* Assume the data buffer size is X.  X is generally not a power of 2.
+     * CONS and PROD are incremented modulo (2*X):
+     *     0 <= cons < 2*X
+     *     0 <= prod < 2*X
+     * This is done because addition modulo X breaks at 2^32 when X is not a
+     * power of 2:
+     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X
+     */
     uint32_t cons;   /* Offset of next item to be consumed by control tools. */
     uint32_t prod;   /* Offset of next item to be produced by Xen.           */
     /*  Records follow immediately after the meta-data header.    */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel