# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 2303fb4682e7cd4feb330fd2aec69672facb4ec6
# Parent a9f3abcc41499b7be971412d66c08d0e9740ff66
New VCPUOP_get_runstate_info hypercall. Returns information about the current
run state of a VCPU (running, runnable, blocked, etc.) and the total time
spent in each state since the VCPU was created.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/dom0_ops.c Sat Feb 25 16:58:37 2006
@@ -46,6 +46,7 @@
struct vcpu *v;
u64 cpu_time = 0;
int flags = DOMFLAGS_BLOCKED;
+ struct vcpu_runstate_info runstate;
info->domain = d->domain_id;
info->nr_online_vcpus = 0;
@@ -55,7 +56,8 @@
* - domain is marked as running if any of its vcpus is running
*/
for_each_vcpu ( d, v ) {
- cpu_time += v->cpu_time;
+ vcpu_runstate_get(v, &runstate);
+ cpu_time += runstate.time[RUNSTATE_running];
info->max_vcpu_id = v->vcpu_id;
if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
{
@@ -497,6 +499,7 @@
{
struct domain *d;
struct vcpu *v;
+ struct vcpu_runstate_info runstate;
ret = -ESRCH;
if ( (d = find_domain_by_id(op->u.getvcpuinfo.domain)) == NULL )
@@ -510,10 +513,12 @@
if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
goto getvcpuinfo_out;
+ vcpu_runstate_get(v, &runstate);
+
op->u.getvcpuinfo.online = !test_bit(_VCPUF_down, &v->vcpu_flags);
op->u.getvcpuinfo.blocked = test_bit(_VCPUF_blocked, &v->vcpu_flags);
op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags);
- op->u.getvcpuinfo.cpu_time = v->cpu_time;
+ op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
op->u.getvcpuinfo.cpu = v->processor;
op->u.getvcpuinfo.cpumap = 0;
memcpy(&op->u.getvcpuinfo.cpumap,
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/domain.c
--- a/xen/common/domain.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/domain.c Sat Feb 25 16:58:37 2006
@@ -451,6 +451,19 @@
case VCPUOP_is_up:
rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
break;
+
+ case VCPUOP_get_runstate_info:
+ {
+ struct vcpu_runstate_info runstate;
+ vcpu_runstate_get(v, &runstate);
+ if ( copy_to_user(arg, &runstate, sizeof(runstate)) )
+ rc = -EFAULT;
+ break;
+ }
+
+ default:
+ rc = -ENOSYS;
+ break;
}
return rc;
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/keyhandler.c Sat Feb 25 16:58:37 2006
@@ -169,8 +169,6 @@
}
extern void dump_runq(unsigned char key);
-extern void print_sched_histo(unsigned char key);
-extern void reset_sched_histo(unsigned char key);
#ifndef NDEBUG
extern void audit_domains_key(unsigned char key);
#endif
@@ -206,10 +204,6 @@
'd', dump_registers, "dump registers");
register_keyhandler(
'h', show_handlers, "show this message");
- register_keyhandler(
- 'l', print_sched_histo, "print sched latency histogram");
- register_keyhandler(
- 'L', reset_sched_histo, "reset sched latency histogram");
register_keyhandler(
'q', dump_domains, "dump domain (and guest debug) info");
register_keyhandler(
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/sched_bvt.c Sat Feb 25 16:58:37 2006
@@ -132,13 +132,13 @@
vcpu_schedule_unlock_irq(v);
}
-static inline u32 calc_avt(struct vcpu *d, s_time_t now)
+static inline u32 calc_avt(struct vcpu *v, s_time_t now)
{
u32 ranfor, mcus;
- struct bvt_dom_info *inf = BVT_INFO(d->domain);
- struct bvt_vcpu_info *einf = EBVT_INFO(d);
-
- ranfor = (u32)(now - d->lastschd);
+ struct bvt_dom_info *inf = BVT_INFO(v->domain);
+ struct bvt_vcpu_info *einf = EBVT_INFO(v);
+
+ ranfor = (u32)(now - v->runstate.state_entry_time);
mcus = (ranfor + MCU - 1)/MCU;
return einf->avt + mcus * inf->mcu_advance;
@@ -262,7 +262,7 @@
curr_evt = calc_evt(curr, calc_avt(curr, now));
/* Calculate the time the current domain would run assuming
the second smallest evt is of the newly woken domain */
- r_time = curr->lastschd +
+ r_time = curr->runstate.state_entry_time +
((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
ctx_allow;
@@ -558,7 +558,6 @@
printk("%3d: %u has=%c ", loop++, v->domain->domain_id,
test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F');
bvt_dump_runq_el(v);
- printk("c=0x%X%08X\n", (u32)(v->cpu_time>>32), (u32)v->cpu_time);
printk(" l: %p n: %p p: %p\n",
&vcpu_inf->run_list, vcpu_inf->run_list.next,
vcpu_inf->run_list.prev);
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/sched_sedf.c Sat Feb 25 16:58:37 2006
@@ -1408,18 +1408,14 @@
{
printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight, d->cpu_time,
+ EDOM_INFO(d)->weight,
EDOM_INFO(d)->score[EXTRA_UTIL_Q],
(EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
- if ( d->cpu_time != 0 )
- printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
- / d->cpu_time);
-
#ifdef SEDF_STATS
if ( EDOM_INFO(d)->block_time_tot != 0 )
printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/schedule.c
--- a/xen/common/schedule.c Sat Feb 25 11:27:53 2006
+++ b/xen/common/schedule.c Sat Feb 25 16:58:37 2006
@@ -36,14 +36,6 @@
static char opt_sched[10] = "sedf";
string_param("sched", opt_sched);
-/*#define WAKE_HISTO*/
-/*#define BLOCKTIME_HISTO*/
-#if defined(WAKE_HISTO)
-#define BUCKETS 31
-#elif defined(BLOCKTIME_HISTO)
-#define BUCKETS 200
-#endif
-
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
/* Various timer handlers. */
@@ -73,6 +65,36 @@
/* Per-CPU periodic timer sends an event to the currently-executing domain. */
static struct timer t_timer[NR_CPUS];
+static inline void vcpu_runstate_change(
+ struct vcpu *v, int new_state, s_time_t new_entry_time)
+{
+ ASSERT(v->runstate.state != new_state);
+ ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock));
+
+ v->runstate.time[v->runstate.state] +=
+ new_entry_time - v->runstate.state_entry_time;
+ v->runstate.state_entry_time = new_entry_time;
+ v->runstate.state = new_state;
+}
+
+void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
+{
+ if ( likely(v == current) )
+ {
+ /* Fast lock-free path. */
+ memcpy(runstate, &v->runstate, sizeof(*runstate));
+ ASSERT(runstate->state = RUNSTATE_running);
+ runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
+ }
+ else
+ {
+ vcpu_schedule_lock_irq(v);
+ memcpy(runstate, &v->runstate, sizeof(*runstate));
+ runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
+ vcpu_schedule_unlock_irq(v);
+ }
+}
+
struct domain *alloc_domain(void)
{
struct domain *d;
@@ -119,6 +141,9 @@
v->cpu_affinity = is_idle_domain(d) ?
cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
+ v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
+ v->runstate.state_entry_time = NOW();
+
if ( (vcpu_id != 0) && !is_idle_domain(d) )
set_bit(_VCPUF_down, &v->vcpu_flags);
@@ -165,8 +190,15 @@
unsigned long flags;
vcpu_schedule_lock_irqsave(v, flags);
+
if ( likely(!vcpu_runnable(v)) )
+ {
+ if ( v->runstate.state == RUNSTATE_runnable )
+ vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+
SCHED_OP(sleep, v);
+ }
+
vcpu_schedule_unlock_irqrestore(v, flags);
TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
@@ -187,11 +219,19 @@
unsigned long flags;
vcpu_schedule_lock_irqsave(v, flags);
+
if ( likely(vcpu_runnable(v)) )
{
+ if ( v->runstate.state >= RUNSTATE_blocked )
+ vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
SCHED_OP(wake, v);
- v->wokenup = NOW();
- }
+ }
+ else if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) )
+ {
+ if ( v->runstate.state == RUNSTATE_blocked )
+ vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+ }
+
vcpu_schedule_unlock_irqrestore(v, flags);
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
@@ -376,8 +416,6 @@
stop_timer(&schedule_data[cpu].s_timer);
- prev->cpu_time += now - prev->lastschd;
-
/* get policy-specific decision on scheduling... */
next_slice = ops.do_schedule(now);
@@ -386,8 +424,6 @@
schedule_data[cpu].curr = next;
- next->lastschd = now;
-
set_timer(&schedule_data[cpu].s_timer, now + r_time);
if ( unlikely(prev == next) )
@@ -397,38 +433,23 @@
}
TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
- prev->domain->domain_id, now - prev->lastschd);
+ prev->domain->domain_id,
+ now - prev->runstate.state_entry_time);
TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
- next->domain->domain_id, now - next->wokenup, r_time);
-
- /*
- * Logic of wokenup field in domain struct:
- * Used to calculate "waiting time", which is the time that a domain
- * spends being "runnable", but not actually running. wokenup is set
- * set whenever a domain wakes from sleeping. However, if wokenup is not
- * also set here then a preempted runnable domain will get a screwed up
- * "waiting time" value next time it is scheduled.
- */
- prev->wokenup = now;
-
-#if defined(WAKE_HISTO)
- if ( !is_idle_vcpu(next) && next->wokenup )
- {
- ulong diff = (ulong)(now - next->wokenup);
- diff /= (ulong)MILLISECS(1);
- if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
- else schedule_data[cpu].hist[BUCKETS-1]++;
- }
- next->wokenup = (s_time_t)0;
-#elif defined(BLOCKTIME_HISTO)
- prev->lastdeschd = now;
- if ( !is_idle_vcpu(next) )
- {
- ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
- if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
- else schedule_data[cpu].hist[BUCKETS-1]++;
- }
-#endif
+ next->domain->domain_id,
+ (next->runstate.state == RUNSTATE_runnable) ?
+ (now - next->runstate.state_entry_time) : 0,
+ r_time);
+
+ ASSERT(prev->runstate.state == RUNSTATE_running);
+ vcpu_runstate_change(
+ prev,
+ (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked :
+ (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
+ now);
+
+ ASSERT(next->runstate.state != RUNSTATE_running);
+ vcpu_runstate_change(next, RUNSTATE_running, now);
ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags));
set_bit(_VCPUF_running, &next->vcpu_flags);
@@ -567,47 +588,6 @@
local_irq_restore(flags);
}
-
-#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
-
-void print_sched_histo(unsigned char key)
-{
- int i, j, k;
- for_each_online_cpu ( k )
- {
- j = 0;
- printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
- for ( i = 0; i < BUCKETS; i++ )
- {
- if ( schedule_data[k].hist[i] != 0 )
- {
- if ( i < BUCKETS-1 )
- printk("%2d:[%7u] ", i, schedule_data[k].hist[i]);
- else
- printk(" >:[%7u] ", schedule_data[k].hist[i]);
- if ( !(++j % 5) )
- printk("\n");
- }
- }
- printk("\n");
- }
-
-}
-
-void reset_sched_histo(unsigned char key)
-{
- int i, j;
- for ( j = 0; j < NR_CPUS; j++ )
- for ( i=0; i < BUCKETS; i++ )
- schedule_data[j].hist[i] = 0;
-}
-
-#else
-
-void print_sched_histo(unsigned char key) { }
-void reset_sched_histo(unsigned char key) { }
-
-#endif
/*
* Local variables:
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Sat Feb 25 11:27:53 2006
+++ b/xen/include/public/vcpu.h Sat Feb 25 16:58:37 2006
@@ -51,6 +51,40 @@
/* Returns 1 if the given VCPU is up. */
#define VCPUOP_is_up 3
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to xen_vcpu_info structure.
+ */
+#define VCPUOP_get_runstate_info 4
+typedef struct vcpu_runstate_info {
+ /* VCPU's current state (RUNSTATE_*). */
+ int state;
+ /* When was current state entered (system time, ns)? */
+ uint64_t state_entry_time;
+ /*
+ * Time spent in each RUNSTATE_* (ns). The sum of these times is
+ * guaranteed not to drift from system time.
+ */
+ uint64_t time[4];
+} vcpu_runstate_info_t;
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running 0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked 2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline 3
+
#endif /* __XEN_PUBLIC_VCPU_H__ */
/*
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Sat Feb 25 11:27:53 2006
+++ b/xen/include/xen/sched-if.h Sat Feb 25 16:58:37 2006
@@ -8,9 +8,6 @@
#ifndef __XEN_SCHED_IF_H__
#define __XEN_SCHED_IF_H__
-#define BUCKETS 10
-/*300*/
-
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
struct vcpu *curr; /* current task */
@@ -18,9 +15,6 @@
void *sched_priv;
struct timer s_timer; /* scheduling timer */
unsigned long tick; /* current periodic 'tick' */
-#ifdef BUCKETS
- u32 hist[BUCKETS]; /* for scheduler latency histogram */
-#endif
} __cacheline_aligned;
extern struct schedule_data schedule_data[];
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Sat Feb 25 11:27:53 2006
+++ b/xen/include/xen/sched.h Sat Feb 25 16:58:37 2006
@@ -8,6 +8,7 @@
#include <xen/smp.h>
#include <public/xen.h>
#include <public/dom0_ops.h>
+#include <public/vcpu.h>
#include <xen/time.h>
#include <xen/timer.h>
#include <xen/grant_table.h>
@@ -63,14 +64,12 @@
struct vcpu *next_in_list;
- struct timer timer; /* one-shot timer for timeout values */
+ struct timer timer; /* one-shot timer for timeout values */
unsigned long sleep_tick; /* tick at which this vcpu started sleep */
- s_time_t lastschd; /* time this domain was last scheduled */
- s_time_t lastdeschd; /* time this domain was last descheduled */
- s_time_t cpu_time; /* total CPU time received till now */
- s_time_t wokenup; /* time domain got woken up */
void *sched_priv; /* scheduler-specific data */
+
+ struct vcpu_runstate_info runstate;
unsigned long vcpu_flags;
@@ -397,7 +396,6 @@
#define _DOMF_debugging 4
#define DOMF_debugging (1UL<<_DOMF_debugging)
-
static inline int vcpu_runnable(struct vcpu *v)
{
return ( (atomic_read(&v->pausecnt) == 0) &&
@@ -415,6 +413,8 @@
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
+
static inline void vcpu_unblock(struct vcpu *v)
{
if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|