# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID d0b7281556f2e7ffe3e19399cd0906b32212f620
# Parent c375c210945282c2c5fd6cb86f51422e211ed8a2
New VCPUOP_register_runstate_memory_area hypercall. Avoids
need for a hypercall in the guest timer interrupt handler.
Cleaned up stolen/blocked tick handling in Linux.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r c375c2109452 -r d0b7281556f2
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Sat Feb 25 19:07:28 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Sat Feb 25 20:28:27 2006
@@ -130,6 +130,9 @@
static DEFINE_PER_CPU(u64, processed_stolen_time);
static DEFINE_PER_CPU(u64, processed_blocked_time);
+/* Current runstate of each CPU (updated automatically by the hypervisor). */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+
/* Must be signed, as it's compared with s64 quantities which can be -ve. */
#define NS_PER_TICK (1000000000LL/HZ)
@@ -575,19 +578,36 @@
irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
s64 delta, delta_cpu, stolen, blocked;
+ u64 sched_time;
int i, cpu = smp_processor_id();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
- struct vcpu_runstate_info runstate;
+ struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
write_seqlock(&xtime_lock);
do {
get_time_values_from_xen();
+ /* Obtain a consistent snapshot of elapsed wallclock cycles. */
delta = delta_cpu =
shadow->system_timestamp + get_nsec_offset(shadow);
delta -= processed_system_time;
delta_cpu -= per_cpu(processed_system_time, cpu);
+
+ /*
+ * Obtain a consistent snapshot of stolen/blocked cycles. We
+ * can use state_entry_time to detect if we get preempted here.
+ */
+ do {
+ sched_time = runstate->state_entry_time;
+ barrier();
+ stolen = runstate->time[RUNSTATE_runnable] +
+ runstate->time[RUNSTATE_offline] -
+ per_cpu(processed_stolen_time, cpu);
+ blocked = runstate->time[RUNSTATE_blocked] -
+ per_cpu(processed_blocked_time, cpu);
+ barrier();
+ } while (sched_time != runstate->state_entry_time);
}
while (!time_values_up_to_date(cpu));
@@ -619,60 +639,44 @@
write_sequnlock(&xtime_lock);
- /* Obtain stolen/blocked cycles, if the hypervisor supports it. */
- if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
- cpu, &runstate) == 0) {
- /*
- * Account stolen ticks.
- * HACK: Passing NULL to account_steal_time()
- * ensures that the ticks are accounted as stolen.
- */
- stolen = runstate.time[RUNSTATE_runnable] +
- runstate.time[RUNSTATE_offline] -
- per_cpu(processed_stolen_time, cpu);
- if (unlikely(stolen < 0)) /* clock jitter */
- stolen = 0;
+ /*
+ * Account stolen ticks.
+ * HACK: Passing NULL to account_steal_time()
+ * ensures that the ticks are accounted as stolen.
+ */
+ if (stolen > 0) {
delta_cpu -= stolen;
- if (unlikely(delta_cpu < 0)) {
- stolen += delta_cpu;
- delta_cpu = 0;
- }
do_div(stolen, NS_PER_TICK);
per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
+ per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
account_steal_time(NULL, (cputime_t)stolen);
-
- /*
- * Account blocked ticks.
- * HACK: Passing idle_task to account_steal_time()
- * ensures that the ticks are accounted as idle/wait.
- */
- blocked = runstate.time[RUNSTATE_blocked] -
- per_cpu(processed_blocked_time, cpu);
- if (unlikely(blocked < 0)) /* clock jitter */
- blocked = 0;
+ }
+
+ /*
+ * Account blocked ticks.
+ * HACK: Passing idle_task to account_steal_time()
+ * ensures that the ticks are accounted as idle/wait.
+ */
+ if (blocked > 0) {
delta_cpu -= blocked;
- if (unlikely(delta_cpu < 0)) {
- blocked += delta_cpu;
- delta_cpu = 0;
- }
do_div(blocked, NS_PER_TICK);
per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
+ per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
account_steal_time(idle_task(cpu), (cputime_t)blocked);
-
- per_cpu(processed_system_time, cpu) +=
- (stolen + blocked) * NS_PER_TICK;
- }
-
+ }
+
+ /* Account user/system ticks. */
if (delta_cpu > 0) {
do_div(delta_cpu, NS_PER_TICK);
+ per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
if (user_mode(regs))
account_user_time(current, (cputime_t)delta_cpu);
else
account_system_time(current, HARDIRQ_OFFSET,
(cputime_t)delta_cpu);
- per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
- }
-
+ }
+
+ /* Local timer processing (see update_process_times()). */
run_local_timers();
if (rcu_pending(cpu))
rcu_check_callbacks(cpu, user_mode(regs));
@@ -684,14 +688,19 @@
static void init_missing_ticks_accounting(int cpu)
{
- struct vcpu_runstate_info runstate = { 0 };
-
- HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate);
-
- per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
+ struct vcpu_register_runstate_memory_area area;
+ struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+
+ memset(runstate, 0, sizeof(*runstate));
+
+ area.addr.v = runstate;
+ HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
+
+ per_cpu(processed_blocked_time, cpu) =
+ runstate->time[RUNSTATE_blocked];
per_cpu(processed_stolen_time, cpu) =
- runstate.time[RUNSTATE_runnable] +
- runstate.time[RUNSTATE_offline];
+ runstate->time[RUNSTATE_runnable] +
+ runstate->time[RUNSTATE_offline];
}
/* not static: needed by APM */
diff -r c375c2109452 -r d0b7281556f2 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Sat Feb 25 19:07:28 2006
+++ b/xen/arch/x86/domain.c Sat Feb 25 20:28:27 2006
@@ -784,6 +784,11 @@
context_saved(prev);
+ /* Update per-VCPU guest runstate shared memory area (if registered). */
+ if ( next->runstate_guest != NULL )
+ __copy_to_user(next->runstate_guest, &next->runstate,
+ sizeof(next->runstate));
+
schedule_tail(next);
BUG();
}
diff -r c375c2109452 -r d0b7281556f2 xen/common/domain.c
--- a/xen/common/domain.c Sat Feb 25 19:07:28 2006
+++ b/xen/common/domain.c Sat Feb 25 20:28:27 2006
@@ -461,6 +461,28 @@
break;
}
+ case VCPUOP_register_runstate_memory_area:
+ {
+ struct vcpu_register_runstate_memory_area area;
+
+ rc = -EINVAL;
+ if ( v != current )
+ break;
+
+ rc = -EFAULT;
+ if ( copy_from_user(&area, arg, sizeof(area)) )
+ break;
+
+ if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) )
+ break;
+
+ rc = 0;
+ v->runstate_guest = area.addr.v;
+ __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate));
+
+ break;
+ }
+
default:
rc = -ENOSYS;
break;
diff -r c375c2109452 -r d0b7281556f2 xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Sat Feb 25 19:07:28 2006
+++ b/xen/include/public/vcpu.h Sat Feb 25 20:28:27 2006
@@ -53,7 +53,7 @@
/*
* Return information about the state and running time of a VCPU.
- * @extra_arg == pointer to xen_vcpu_info structure.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
*/
#define VCPUOP_get_runstate_info 4
typedef struct vcpu_runstate_info {
@@ -85,6 +85,27 @@
*/
#define RUNSTATE_offline 3
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ * 1. The registered address may be virtual or physical, depending on the
+ * platform. The virtual address should be registered on x86 systems.
+ * 2. Only one shared area may be registered per VCPU. The shared area is
+ * updated by the hypervisor each time the VCPU is scheduled. Thus
+ * runstate.state will always be RUNSTATE_running and
+ * runstate.state_entry_time will indicate the system time at which the
+ * VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+typedef struct vcpu_register_runstate_memory_area {
+ union {
+ struct vcpu_runstate_info *v;
+ uint64_t p;
+ } addr;
+} vcpu_register_runstate_memory_area_t;
+
#endif /* __XEN_PUBLIC_VCPU_H__ */
/*
diff -r c375c2109452 -r d0b7281556f2 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Sat Feb 25 19:07:28 2006
+++ b/xen/include/xen/sched.h Sat Feb 25 20:28:27 2006
@@ -70,6 +70,7 @@
void *sched_priv; /* scheduler-specific data */
struct vcpu_runstate_info runstate;
+ struct vcpu_runstate_info *runstate_guest; /* guest address */
unsigned long vcpu_flags;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|