WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] New VCPUOP_register_runstate_memory_area hypercall. Avoi

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] New VCPUOP_register_runstate_memory_area hypercall. Avoids
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Sat, 25 Feb 2006 22:48:08 +0000
Delivery-date: Sat, 25 Feb 2006 22:48:28 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID d0b7281556f2e7ffe3e19399cd0906b32212f620
# Parent  c375c210945282c2c5fd6cb86f51422e211ed8a2
New VCPUOP_register_runstate_memory_area hypercall. Avoids
need for a hypercall in the guest timer interrupt handler.

Cleaned up stolen/blocked tick handling in Linux.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r c375c2109452 -r d0b7281556f2 
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Sat Feb 25 19:07:28 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Sat Feb 25 20:28:27 2006
@@ -130,6 +130,9 @@
 static DEFINE_PER_CPU(u64, processed_stolen_time);
 static DEFINE_PER_CPU(u64, processed_blocked_time);
 
+/* Current runstate of each CPU (updated automatically by the hypervisor). */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+
 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
 #define NS_PER_TICK (1000000000LL/HZ)
 
@@ -575,19 +578,36 @@
 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
        s64 delta, delta_cpu, stolen, blocked;
+       u64 sched_time;
        int i, cpu = smp_processor_id();
        struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
-       struct vcpu_runstate_info runstate;
+       struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
 
        write_seqlock(&xtime_lock);
 
        do {
                get_time_values_from_xen();
 
+               /* Obtain a consistent snapshot of elapsed wallclock cycles. */
                delta = delta_cpu = 
                        shadow->system_timestamp + get_nsec_offset(shadow);
                delta     -= processed_system_time;
                delta_cpu -= per_cpu(processed_system_time, cpu);
+
+               /*
+                * Obtain a consistent snapshot of stolen/blocked cycles. We
+                * can use state_entry_time to detect if we get preempted here.
+                */
+               do {
+                       sched_time = runstate->state_entry_time;
+                       barrier();
+                       stolen = runstate->time[RUNSTATE_runnable] +
+                               runstate->time[RUNSTATE_offline] -
+                               per_cpu(processed_stolen_time, cpu);
+                       blocked = runstate->time[RUNSTATE_blocked] -
+                               per_cpu(processed_blocked_time, cpu);
+                       barrier();
+               } while (sched_time != runstate->state_entry_time);
        }
        while (!time_values_up_to_date(cpu));
 
@@ -619,60 +639,44 @@
 
        write_sequnlock(&xtime_lock);
 
-       /* Obtain stolen/blocked cycles, if the hypervisor supports it. */
-       if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
-                              cpu, &runstate) == 0) {
-               /*
-                * Account stolen ticks.
-                * HACK: Passing NULL to account_steal_time()
-                * ensures that the ticks are accounted as stolen.
-                */
-               stolen = runstate.time[RUNSTATE_runnable] +
-                       runstate.time[RUNSTATE_offline] -
-                       per_cpu(processed_stolen_time, cpu);
-               if (unlikely(stolen < 0)) /* clock jitter */
-                       stolen = 0;
+       /*
+        * Account stolen ticks.
+        * HACK: Passing NULL to account_steal_time()
+        * ensures that the ticks are accounted as stolen.
+        */
+       if (stolen > 0) {
                delta_cpu -= stolen;
-               if (unlikely(delta_cpu < 0)) {
-                       stolen += delta_cpu;
-                       delta_cpu = 0;
-               }
                do_div(stolen, NS_PER_TICK);
                per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
+               per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
                account_steal_time(NULL, (cputime_t)stolen);
-
-               /*
-                * Account blocked ticks.
-                * HACK: Passing idle_task to account_steal_time()
-                * ensures that the ticks are accounted as idle/wait.
-                */
-               blocked = runstate.time[RUNSTATE_blocked] -
-                       per_cpu(processed_blocked_time, cpu);
-               if (unlikely(blocked < 0)) /* clock jitter */
-                       blocked = 0;
+       }
+
+       /*
+        * Account blocked ticks.
+        * HACK: Passing idle_task to account_steal_time()
+        * ensures that the ticks are accounted as idle/wait.
+        */
+       if (blocked > 0) {
                delta_cpu -= blocked;
-               if (unlikely(delta_cpu < 0)) {
-                       blocked += delta_cpu;
-                       delta_cpu = 0;
-               }
                do_div(blocked, NS_PER_TICK);
                per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
+               per_cpu(processed_system_time, cpu)  += blocked * NS_PER_TICK;
                account_steal_time(idle_task(cpu), (cputime_t)blocked);
-
-               per_cpu(processed_system_time, cpu) +=
-                       (stolen + blocked) * NS_PER_TICK;
-       }
-
+       }
+
+       /* Account user/system ticks. */
        if (delta_cpu > 0) {
                do_div(delta_cpu, NS_PER_TICK);
+               per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
                if (user_mode(regs))
                        account_user_time(current, (cputime_t)delta_cpu);
                else
                        account_system_time(current, HARDIRQ_OFFSET,
                                            (cputime_t)delta_cpu);
-               per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
-       }
-
+       }
+
+       /* Local timer processing (see update_process_times()). */
        run_local_timers();
        if (rcu_pending(cpu))
                rcu_check_callbacks(cpu, user_mode(regs));
@@ -684,14 +688,19 @@
 
 static void init_missing_ticks_accounting(int cpu)
 {
-       struct vcpu_runstate_info runstate = { 0 };
-
-       HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate);
-
-       per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
+       struct vcpu_register_runstate_memory_area area;
+       struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+
+       memset(runstate, 0, sizeof(*runstate));
+
+       area.addr.v = runstate;
+       HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
+
+       per_cpu(processed_blocked_time, cpu) =
+               runstate->time[RUNSTATE_blocked];
        per_cpu(processed_stolen_time, cpu) =
-               runstate.time[RUNSTATE_runnable] +
-               runstate.time[RUNSTATE_offline];
+               runstate->time[RUNSTATE_runnable] +
+               runstate->time[RUNSTATE_offline];
 }
 
 /* not static: needed by APM */
diff -r c375c2109452 -r d0b7281556f2 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Sat Feb 25 19:07:28 2006
+++ b/xen/arch/x86/domain.c     Sat Feb 25 20:28:27 2006
@@ -784,6 +784,11 @@
 
     context_saved(prev);
 
+    /* Update per-VCPU guest runstate shared memory area (if registered). */
+    if ( next->runstate_guest != NULL )
+        __copy_to_user(next->runstate_guest, &next->runstate,
+                       sizeof(next->runstate));
+
     schedule_tail(next);
     BUG();
 }
diff -r c375c2109452 -r d0b7281556f2 xen/common/domain.c
--- a/xen/common/domain.c       Sat Feb 25 19:07:28 2006
+++ b/xen/common/domain.c       Sat Feb 25 20:28:27 2006
@@ -461,6 +461,28 @@
         break;
     }
 
+    case VCPUOP_register_runstate_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EINVAL;
+        if ( v != current )
+            break;
+
+        rc = -EFAULT;
+        if ( copy_from_user(&area, arg, sizeof(area)) )
+            break;
+
+        if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) )
+            break;
+
+        rc = 0;
+        v->runstate_guest = area.addr.v;
+        __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate));
+
+        break;
+    }
+
     default:
         rc = -ENOSYS;
         break;
diff -r c375c2109452 -r d0b7281556f2 xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Sat Feb 25 19:07:28 2006
+++ b/xen/include/public/vcpu.h Sat Feb 25 20:28:27 2006
@@ -53,7 +53,7 @@
 
 /*
  * Return information about the state and running time of a VCPU.
- * @extra_arg == pointer to xen_vcpu_info structure.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
  */
 #define VCPUOP_get_runstate_info    4
 typedef struct vcpu_runstate_info {
@@ -85,6 +85,27 @@
  */
 #define RUNSTATE_offline  3
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address may be virtual or physical, depending on the
+ *     platform. The virtual address should be registered on x86 systems.
+ *  2. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+typedef struct vcpu_register_runstate_memory_area {
+    union {
+        struct vcpu_runstate_info *v;
+        uint64_t p;
+    } addr;
+} vcpu_register_runstate_memory_area_t;
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff -r c375c2109452 -r d0b7281556f2 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Sat Feb 25 19:07:28 2006
+++ b/xen/include/xen/sched.h   Sat Feb 25 20:28:27 2006
@@ -70,6 +70,7 @@
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+    struct vcpu_runstate_info *runstate_guest; /* guest address */
 
     unsigned long    vcpu_flags;
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] New VCPUOP_register_runstate_memory_area hypercall. Avoids, Xen patchbot -unstable <=