This patch performs the bulk of the changes described in 0/2 description
above, to improve HPET accuracy
Signed-off-by: Dave Winchell <dwinchell@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
diff -r ec3493b63170 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -24,14 +24,11 @@
#include <xen/sched.h>
#include <xen/event.h>
+
#define HPET_BASE_ADDRESS 0xfed00000ULL
#define HPET_MMAP_SIZE 1024
#define S_TO_NS 1000000000ULL /* 1s = 10^9 ns */
#define S_TO_FS 1000000000000000ULL /* 1s = 10^15 fs */
-
-/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
-#define STIME_PER_HPET_TICK 16
-#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
#define HPET_ID 0x000
#define HPET_PERIOD 0x004
@@ -72,8 +69,9 @@
<< HPET_TN_INT_ROUTE_CAP_SHIFT)
#define hpet_tick_to_ns(h, tick) \
- ((s_time_t)((((tick) > (h)->hpet_to_ns_limit) ? \
- ~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10))
+ (s_time_t)hpet_mult_div(tick, h->hpet.phys_period, 1000000UL)
+
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period)
#define timer_config(h, n) (h->hpet.timers[n].config)
#define timer_is_periodic(h, n) (timer_config(h, n) & HPET_TN_PERIODIC)
@@ -139,15 +137,34 @@
return 0;
}
-
static inline uint64_t hpet_read_maincounter(HPETState *h)
{
- ASSERT(spin_is_locked(&h->lock));
+ uint64_t mc;
- if ( hpet_enabled(h) )
- return guest_time_hpet(h->vcpu) + h->mc_offset;
- else
- return h->hpet.mc64;
+ mc = read_64_main_counter() + h->mc_offset;
+ return mc;
+}
+static inline uint64_t hpet_compute_diff(HPETState *h, int tn)
+{
+
+ if ( timer_is_32bit(h, tn) ) {
+ uint32_t tn_cmp, diff, mc;
+
+ tn_cmp = (uint32_t)h->hpet.timers[tn].cmp;
+ mc = (uint32_t)hpet_read_maincounter(h);
+ diff = tn_cmp - mc;
+ diff = (int32_t)diff > 0 ? diff : (uint32_t)0;
+ return (uint64_t)diff;
+ }
+ else {
+ uint64_t tn_cmp, diff, mc;
+
+ mc = hpet_read_maincounter(h);
+ tn_cmp = h->hpet.timers[tn].cmp;
+ diff = tn_cmp - mc;
+ diff = (int64_t)diff > 0 ? diff : (uint64_t)0;
+ return diff;
+ }
}
static int hpet_read(
@@ -190,13 +207,9 @@
stop_timer(&h->timers[tn]);
}
-/* the number of HPET tick that stands for
- * 1/(2^10) second, namely, 0.9765625 milliseconds */
-#define HPET_TINY_TIME_SPAN ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
-
static void hpet_set_timer(HPETState *h, unsigned int tn)
{
- uint64_t tn_cmp, cur_tick, diff;
+ uint64_t diff;
ASSERT(tn < HPET_TIMER_NUM);
ASSERT(spin_is_locked(&h->lock));
@@ -209,25 +222,7 @@
pit_stop_channel0_irq(pit);
}
- tn_cmp = h->hpet.timers[tn].cmp;
- cur_tick = hpet_read_maincounter(h);
- if ( timer_is_32bit(h, tn) )
- {
- tn_cmp = (uint32_t)tn_cmp;
- cur_tick = (uint32_t)cur_tick;
- }
-
- diff = tn_cmp - cur_tick;
-
- /*
- * Detect time values set in the past. This is hard to do for 32-bit
- * comparators as the timer does not have to be set that far in the future
- * for the counter difference to wrap a 32-bit signed integer. We fudge
- * by looking for a 'small' time value in the past.
- */
- if ( (int64_t)diff < 0 )
- diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
- ? (uint32_t)diff : 0;
+ diff = hpet_compute_diff(h, tn);
set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
}
@@ -273,14 +268,15 @@
if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) )
{
/* Enable main counter and interrupt generation. */
- h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu);
+
+ h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+
for ( i = 0; i < HPET_TIMER_NUM; i++ )
hpet_set_timer(h, i);
}
else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) )
{
/* Halt main counter and disable interrupt generation. */
- h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu);
for ( i = 0; i < HPET_TIMER_NUM; i++ )
hpet_stop_timer(h, i);
}
@@ -291,6 +287,9 @@
gdprintk(XENLOG_WARNING,
"HPET: writing main counter but it's not halted!\n");
h->hpet.mc64 = new_val;
+
+ h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+
break;
case HPET_T0_CFG:
@@ -333,7 +332,7 @@
* - maximum is to prevent overflow in time_after() calculations
*/
if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) )
- new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale;
+ new_val = hpet_phys_ns_to_ticks(MICROSECS(900),
h->hpet.phys_period);
new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1;
h->hpet.period[tn] = new_val;
}
@@ -373,10 +372,216 @@
.write_handler = hpet_write
};
+static void hpet_stats_dump_dom(struct domain *d)
+{
+ struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+ unsigned long mc, s;
+ int i;
+
+ printk("domain %d\n", d->domain_id);
+ mc = hpet_read_maincounter(h);
+ s = hpet_tick_to_ns(h, mc);
+ s = s / 1000000000UL;
+
+ printk("cur index %ld\n", s % INTR_CNT_BUCKETS);
+ for(i = 0; i < INTR_CNT_BUCKETS; i++) {
+ if(!(i%10))
+ printk("\n");
+ printk("%ld ", h->hpet.intr_counts[i]);
+ }
+ printk("\n");
+}
+static void hpet_state_dump(struct domain *d)
+{
+ struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+ printk("timers.config: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].config,
h->hpet.timers[1].config, h->hpet.timers[2].config);
+ printk("timers.cmp: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].cmp,
h->hpet.timers[1].cmp, h->hpet.timers[2].cmp);
+ printk("current mc: 0x%lx\n", hpet_read_maincounter(h));
+ printk("period: %lx %lx %lx\n", h->hpet.period[0], h->hpet.period[1],
h->hpet.period[1]);
+ printk("mc_offset 0x%lx\n",h->mc_offset);
+ printk("phys_period 0x%lx\n",h->hpet.phys_period);
+ printk("last_end_of_intr_mc 0x%lx\n",h->hpet.last_end_of_intr_mc);
+ printk("end_of_intr_mc 0x%lx\n",h->hpet.end_of_intr_mc);
+ printk("cpu_khz 0x%lx\n",h->hpet.cpu_khz);
+ printk("migr_local_tsc 0x%lx\n",h->hpet.migr_local_tsc);
+ printk("intr_pending_nr 0x%lx\n",h->hpet.intr_pending_nr);
+ printk("pending_mask 0x%lx\n",h->hpet.pending_mask);
+ printk("delivery_policy %d\n",h->hpet.delivery_policy);
+ printk("vector 0x%x 0x%x
0x%x\n",h->hpet.vector[0],h->hpet.vector[1],h->hpet.vector[2]);
+}
+static void hpet_stats_dump(unsigned char c)
+{
+ struct domain *d;
+
+ for_each_domain(d) {
+ if(d->domain_id) {
+ hpet_stats_dump_dom(d);
+ hpet_state_dump(d);
+ }
+ }
+}
+#include <xen/keyhandler.h>
+static __init int hpet_stats_dump_keyhandler_init(void)
+{
+ register_keyhandler('Z', hpet_stats_dump,"hpet_stats_dump");
+ return 0;
+}
+__initcall(hpet_stats_dump_keyhandler_init);
+static void hpet_stats(struct vcpu *v)
+{
+ struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+ unsigned long mc, s, u;
+
+ mc = hpet_read_maincounter(h);
+ s = hpet_tick_to_ns(h, mc);
+ s = s / 1000000000UL;
+ if(h->hpet.intr_counts_last_s && (s > h->hpet.intr_counts_last_s)) {
+ for(u = (h->hpet.intr_counts_last_s + 1); u <= s; u++)
+ h->hpet.intr_counts[u % INTR_CNT_BUCKETS] = 0;
+ }
+ h->hpet.intr_counts_last_s = s;
+ h->hpet.intr_counts[s % INTR_CNT_BUCKETS]++;
+}
+
+
+
+static void hpet_vioapic_del_cb(uint64_t arg, uint32_t intrs_delivered)
+{
+ HPETState *h = (HPETState *)arg;
+
+ h->hpet.pending_mask = intrs_delivered;
+}
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int
post)
+{
+ struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+ spin_lock(&h->lock);
+ if(post)
+ hpet_stats(v);
+ else {
+ clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+ if(!(h->hpet.pending_mask))
+ h->hpet.end_of_intr_mc = hpet_read_maincounter(h);
+ }
+ spin_unlock(&h->lock);
+}
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned
int post)
+{
+ struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+ spin_lock(&h->lock);
+ if(post) {
+ hpet_stats(v);
+ if(!h->hpet.intr_pending_nr) {
+ // probably should kill domain here
+ printk("hpet_intr_en_fn: unexpected cleared intr_pending_nr
pending_mask 0x%lx\n", h->hpet.pending_mask);
+ spin_unlock(&h->lock);
+ return;
+ }
+ clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+
+ if(!(h->hpet.pending_mask)) {
+ h->hpet.intr_pending_nr--;
+ if(h->hpet.intr_pending_nr) {
+ hvm_isa_irq_deassert(v->domain, 0);
+ hvm_isa_irq_assert_cb(v->domain, 0, hpet_vioapic_del_cb,
(uint64_t)h);
+ }
+ }
+ }
+ spin_unlock(&h->lock);
+}
+
+/* For guest computes missed policy,
+ * we will only route the interrupt if a) the last interrupt routed has been
processed by the guest
+ * and b) its been more than a (clock) periods worth of main counter ticks
since that interrupt
+ * was processed.
+ *
+ * It was found, through experimentation, that Linux guests keep very accurate
time for hpet with
+ * this logic, even if it means we are only delivering every 2*period. This is
because the Linux logic
+ * for missed ticks is very good for hpet. On the other hand, delivering the
interrupt just slightly
+ * early causes poor timekeeping.
+ *
+ * It was also found that time stamping at the end of interrupt processing
improved accuracy over
+ * time stamping at injection time. This is probably due to the delay that can
happen in the Linux
+ * interrupt handler if it has to wait for a lock.
+ *
+ */
+
+static void hpet_route_decision_missed_ticks(HPETState *h, unsigned int tn,
int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+ uint64_t mc;
+ struct domain *d = h->vcpu->domain;
+ unsigned int vector;
+
+ *route = 0;
+ *cb_expected = 0;
+ if(!vioapic_get_vector(d, isa_irq, &vector)) {
+ mc = hpet_read_maincounter(h);
+ if(h->hpet.vector[tn] == ~0U) {
+ h->hpet.vector[tn] = vector;
+ if(hvm_register_intr_en_notif(d, vector,
HVM_INTR_EN_NOTIF_HPET_MISSED))
+ panic(__FUNCTION__);
+ h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc = 0;
+ *cb_expected = 1;
+ *route = 1;
+ }
+ else if((h->hpet.end_of_intr_mc != h->hpet.last_end_of_intr_mc) &&
+ ((mc - h->hpet.end_of_intr_mc) > h->hpet.period[tn])) {
+ if(vector != h->hpet.vector[tn]) {
+ hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+ h->hpet.vector[tn] = vector;
+ if(hvm_register_intr_en_notif(d, vector,
HVM_INTR_EN_NOTIF_HPET_MISSED))
+ panic(__FUNCTION__);
+ }
+ h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc;
+ *cb_expected = 1;
+ *route = 1;
+ }
+ }
+ else
+ *route = 1;
+}
+static void hpet_route_decision_not_missed_ticks(HPETState *h, unsigned int
tn, int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+ struct domain *d = h->vcpu->domain;
+ unsigned int vector;
+
+ *route = 0;
+ *cb_expected = 0;
+ if(!vioapic_get_vector(d, isa_irq, &vector)) {
+ if(h->hpet.vector[tn] == ~0U) {
+ h->hpet.vector[tn] = vector;
+ if(hvm_register_intr_en_notif(d, vector,
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+ panic(__FUNCTION__);
+ }
+ if(h->hpet.intr_pending_nr++)
+ return;
+
+ if(vector != h->hpet.vector[tn]) {
+ hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+ h->hpet.vector[tn] = vector;
+ if(hvm_register_intr_en_notif(d, vector,
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+ panic(__FUNCTION__);
+ }
+ *cb_expected = 1;
+ *route = 1;
+ }
+ else
+ *route = 1;
+}
+
+typedef void (*hpet_route_fn_t)(HPETState *h, unsigned int tn, int isa_irq,
unsigned int *route, unsigned int *cb_expected);
+
+static hpet_route_fn_t hpet_determine_route_params[HPET_DEL_POLICY_NUMS] =
{hpet_route_decision_missed_ticks,
+
hpet_route_decision_not_missed_ticks};
+
static void hpet_route_interrupt(HPETState *h, unsigned int tn)
{
unsigned int tn_int_route = timer_int_route(h, tn);
struct domain *d = h->vcpu->domain;
+ unsigned int route = 1;
+ unsigned int cb_expected = 0;
ASSERT(spin_is_locked(&h->lock));
@@ -386,8 +591,18 @@
timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
int isa_irq = (tn == 0) ? 0 : 8;
- hvm_isa_irq_deassert(d, isa_irq);
- hvm_isa_irq_assert(d, isa_irq);
+
+ if(!tn)
+ (*hpet_determine_route_params[h->hpet.delivery_policy])(h, tn,
isa_irq, &route, &cb_expected);
+
+ if(route) {
+ hvm_isa_irq_deassert(d, isa_irq);
+ if(cb_expected) {
+ hvm_isa_irq_assert_cb(d, isa_irq, hpet_vioapic_del_cb,
(uint64_t)h);
+ }
+ else
+ hvm_isa_irq_assert(d, isa_irq);
+ }
return;
}
@@ -405,6 +620,46 @@
spin_unlock(&d->arch.hvm_domain.irq_lock);
}
+
+
+static void hpet_timer0_timeout_missed_ticks(HPETState *h)
+{
+ uint64_t mc = hpet_read_maincounter(h);
+ unsigned int tn = 0;
+ uint64_t period = h->hpet.period[tn];
+
+ if ( timer_is_32bit(h, tn) )
+ {
+ while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+ h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp +
period);
+ }
+ else
+ {
+ while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+ h->hpet.timers[tn].cmp += period;
+ }
+ set_timer(&h->timers[tn],
+ NOW() + hpet_tick_to_ns(h, period));
+}
+static void hpet_timer0_timeout_not_missed_ticks(HPETState *h)
+{
+ unsigned int tn = 0;
+ uint64_t diff;
+ uint64_t period = h->hpet.period[tn];
+
+ if ( timer_is_32bit(h, tn) )
+ h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + period);
+ else
+ h->hpet.timers[tn].cmp += period;
+
+ diff = hpet_compute_diff(h, tn);
+ set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
+}
+
+typedef void (*hpet_timer0_timeout_fn_t)(HPETState *h);
+
+static hpet_timer0_timeout_fn_t hpet_timer0_timeout[HPET_DEL_POLICY_NUMS] =
{hpet_timer0_timeout_missed_ticks,
+
hpet_timer0_timeout_not_missed_ticks};
static void hpet_timer_fn(void *opaque)
{
struct HPET_timer_fn_info *htfi = opaque;
@@ -424,19 +679,25 @@
if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) )
{
- uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
- if ( timer_is_32bit(h, tn) )
- {
- while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp = (uint32_t)(
- h->hpet.timers[tn].cmp + period);
- }
- else
- {
- while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp += period;
- }
- set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+ if(!tn)
+ (*hpet_timer0_timeout[h->hpet.delivery_policy])(h);
+
+ else
+ {
+ uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
+ if ( timer_is_32bit(h, tn) )
+ {
+ while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+ h->hpet.timers[tn].cmp = (uint32_t)(
+ h->hpet.timers[tn].cmp
+ period);
+ }
+ else
+ {
+ while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+ h->hpet.timers[tn].cmp += period;
+ }
+ set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+ }
}
spin_unlock(&h->lock);
@@ -462,7 +723,10 @@
spin_lock(&hp->lock);
/* Write the proper value into the main counter */
- hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp->vcpu);
+
+ hp->hpet.mc64 = hpet_read_maincounter(hp);
+ rdtscll(hp->hpet.migr_local_tsc);
+ hp->hpet.cpu_khz = cpu_khz;
/* Save the HPET registers */
rc = _hvm_init_entry(h, HVM_SAVE_CODE(HPET), 0, HVM_SAVE_LENGTH(HPET));
@@ -488,19 +752,70 @@
C(period[0]);
C(period[1]);
C(period[2]);
+ C(vector[0]);
+ C(vector[1]);
+ C(vector[2]);
+ C(last_end_of_intr_mc);
+ C(end_of_intr_mc);
+ C(intr_pending_nr);
+ C(pending_mask);
+ C(delivery_policy);
+ C(phys_period);
+ C(cpu_khz);
+ C(migr_local_tsc);
+ C(intr_counts_last_s);
#undef C
+ memcpy(rec->intr_counts, hp->hpet.intr_counts,
sizeof(hp->hpet.intr_counts));
}
spin_unlock(&hp->lock);
return rc;
}
+static int hpet_debug_migr_check_period(struct domain *d, HPETState *hp)
+{
+ unsigned long period, m_period, delta;
+
+ period = read_hpet_period();
+ m_period = (hp->hpet.capability >> 32) & 0xffffffffUL;
+ delta = (period > m_period) ? (period - m_period) : (m_period - period);
+ if(delta) {
+ /* Some hpets report small differences in period. A difference of 1 has
been seen.
+ * Allow 100 as that is still 0.00014%, which is small enough.
+ */
+ printk("hpet.capability 0x%lx ((hp->hpet.capability >> 32) &
0xffffffffUL) 0x%lx period %lx\n",
+ hp->hpet.capability,
+ ((hp->hpet.capability >> 32) & 0xffffffffUL),
+ period);
+ if(delta > 100) {
+ printk("hpet period difference %ld too large\n", delta);
+ return 1;
+ }
+ }
+ return 0;
+}
+#define HPET_MIGR_TICK_ADJUSTMENT 1
+/*
+ * HPET_MIGR_TICK_ADJUSTMENT -
+ * This corrects for some of the time between hpet save on the sending node
+ * and hpet load on the receiving node. The correction has been found to be
quite small,
+ * 300-400 usec. This adjustment is based on sending a final message in
migrate with the
+ * tsc at send time (last_tsc_sender) in the message. Upon reception the tsc
(first_tsc_receiver)
+ * is recorded. So the only time we are not taking into account is the time
the message is in
+ * transit.
+ * The reason this adjustment is in here, given how small it is, is that
there may be circumstances,
+ * for example a node heavily loaded with other guests, where the adjustment
would be significant.
+ */
static int hpet_load(struct domain *d, hvm_domain_context_t *h)
{
HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet;
struct hvm_hw_hpet *rec;
int i;
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+ unsigned long now, dt1, dt2, dt1ticks, dt2ticks, period;
+#endif
+
spin_lock(&hp->lock);
@@ -531,11 +846,50 @@
C(period[0]);
C(period[1]);
C(period[2]);
+ C(vector[0]);
+ C(vector[1]);
+ C(vector[2]);
+ C(last_end_of_intr_mc);
+ C(end_of_intr_mc);
+ C(intr_pending_nr);
+ C(pending_mask);
+ C(delivery_policy);
+ C(phys_period);
+ C(cpu_khz);
+ C(migr_local_tsc);
+ C(intr_counts_last_s);
#undef C
+
+ memcpy(hp->hpet.intr_counts, rec->intr_counts,
sizeof(hp->hpet.intr_counts));
- /* Recalculate the offset between the main counter and guest time */
- hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu);
-
+ /* Recalculate the offset between the main counter and guest time */
+
+ if(hpet_debug_migr_check_period(d, hp))
+ return -EINVAL;
+
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+ period = read_hpet_period();
+ rdtscll(now);
+ /* dt1 is the time delta on the sending node between the sending of
the last migrate message and the call to hpet_save. */
+ dt1 = ((d->last_tsc_sender - hp->hpet.migr_local_tsc) * 1000UL) /
hp->hpet.cpu_khz;
+ dt1 = dt1 * 1000UL;
+ dt1ticks = hpet_phys_ns_to_ticks(dt1, period);
+
+ /* dt2 is the time delta on the reveiving node between now (hpet_load)
and the reception of the last migrate message. */
+ dt2 = ((now - d->first_tsc_receiver) * 1000UL) / cpu_khz;
+ dt2 = dt2 * 1000UL;
+ dt2ticks = hpet_phys_ns_to_ticks(dt2, period);
+ hp->mc_offset = hp->hpet.mc64 + dt1ticks + dt2ticks -
read_64_main_counter();
+#else
+ hp->mc_offset = hp->hpet.mc64 - read_64_main_counter();
+#endif
+
+ if(hp->hpet.delivery_policy ==
HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS)
+ hvm_register_intr_en_notif(d, hp->hpet.vector[0],
HVM_INTR_EN_NOTIF_HPET_MISSED);
+ else if(hp->hpet.delivery_policy ==
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS)
+ hvm_register_intr_en_notif(d, hp->hpet.vector[0],
HVM_INTR_EN_NOTIF_HPET_NO_MISSED);
+
+
/* Restart the timers */
for ( i = 0; i < HPET_TIMER_NUM; i++ )
if ( hpet_enabled(hp) )
@@ -548,6 +902,17 @@
HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM);
+void hpet_notify_timer_mode(struct domain *d, uint64_t value)
+{
+ HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+ if(value == HVM_HPET_guest_computes_missed_ticks)
+ h->hpet.delivery_policy = HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS;
+ else if(value == HVM_HPET_guest_does_not_compute_missed_ticks)
+ h->hpet.delivery_policy =
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS;
+}
+
+
void hpet_init(struct vcpu *v)
{
HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
@@ -557,18 +922,22 @@
spin_lock_init(&h->lock);
+ if(hpet_physical_inited)
+ printk("virtual hpet_init: using physical hpet\n");
+ else
+ printk("virtual hpet_init: using simulated hpet\n");
+
h->vcpu = v;
h->stime_freq = S_TO_NS;
-
- h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) /
h->stime_freq;
- h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
+ h->hpet.phys_period = read_hpet_period();
/* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
h->hpet.capability = 0x8086A201ULL;
/* This is the number of femptoseconds per HPET tick. */
/* Here we define HPET's frequency to be 1/16 of Xen system time */
- h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
+
+ h->hpet.capability |= read_hpet_period() << 32;
for ( i = 0; i < HPET_TIMER_NUM; i++ )
{
@@ -577,6 +946,7 @@
h->hpet.timers[i].cmp = ~0ULL;
h->timer_fn_info[i].hs = h;
h->timer_fn_info[i].tn = i;
+ h->hpet.vector[i] = ~0U;
init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i],
v->processor);
}
@@ -590,7 +960,6 @@
for ( i = 0; i < HPET_TIMER_NUM; i++ )
kill_timer(&h->timers[i]);
}
-
void hpet_reset(struct domain *d)
{
hpet_deinit(d);
diff -r ec3493b63170 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -458,6 +458,8 @@
ctxt.dr6 = vc->debugreg[6];
ctxt.dr7 = vc->debugreg[7];
+ ctxt.int_notif = v->int_notif;
+
if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
return 1;
}
@@ -612,6 +614,9 @@
vc->flags = VGCF_online;
v->fpu_initialised = 1;
+
+ v->int_notif = ctxt.int_notif;
+
/* Auxiliary processors should be woken immediately. */
if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
@@ -2382,8 +2387,9 @@
hvm_latch_shinfo_size(d);
break;
case HVM_PARAM_TIMER_MODE:
- if ( a.value > HVMPTM_one_missed_tick_pending )
+ if ( a.value > HVM_HPET_guest_does_not_compute_missed_ticks )
rc = -EINVAL;
+ hpet_notify_timer_mode(d, a.value);
break;
case HVM_PARAM_MIG_LAST_TSC:
d->last_tsc_sender = a.value;
@@ -2574,7 +2580,98 @@
return rc;
}
+/* Interrupt inject and completion notification facility.
+ * Register 'notif_fn', to be called whenever 'vector' is injected (post = 1)
or
+ * completed (post = 0). Here completed is when the guest re-enables
interrupts.
+ */
+
+
+notif_fn_t hvm_intr_en_notif_fn[HVM_INTR_EN_NOTIF_MAX+1] = {(notif_fn_t)0,
hpet_intr_en_fn_missed, hpet_intr_en_fn_no_missed};
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector,
int notif_fn_index)
+{
+ intr_en_notif_t *entry;
+ struct list_head *cur;
+ intr_en_notif_t *cur_entry;
+
+ entry = xmalloc(struct intr_en_notif);
+ entry->notif_vector = notif_vector;
+ entry->notif_fn = notif_fn_index;
+ if(!entry) {
+ printk("hvm_register_intr_en_notif: xmalloc failed\n");
+ return 1;
+ }
+ spin_lock(&d->intr_en_notif_lock);
+ list_for_each(cur, &d->intr_en_notif_list) {
+ cur_entry = list_entry(cur, struct intr_en_notif, links);
+ if(cur_entry->notif_vector == notif_vector) {
+ list_del(cur);
+ xfree(cur_entry);
+ break;
+ }
+ }
+ list_add_tail(&entry->links, &d->intr_en_notif_list);
+ set_bit(notif_vector, &d->intr_en_notif_bitmap);
+ spin_unlock(&d->intr_en_notif_lock);
+ return 0;
+}
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector)
+{
+ struct list_head *cur;
+ intr_en_notif_t *cur_entry;
+
+ spin_lock(&d->intr_en_notif_lock);
+ clear_bit(notif_vector, &d->intr_en_notif_bitmap);
+ list_for_each(cur, &d->intr_en_notif_list) {
+ cur_entry = list_entry(cur, struct intr_en_notif, links);
+ if(cur_entry->notif_vector == notif_vector) {
+ list_del(cur);
+ xfree(cur_entry);
+ break;
+ }
+ }
+ spin_unlock(&d->intr_en_notif_lock);
+ return 0;
+}
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector)
+{
+ struct list_head *cur;
+ intr_en_notif_t *cur_entry;
+ struct domain *d = v->domain;
+
+ if(test_bit(vector, &d->intr_en_notif_bitmap)) {
+ vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+ spin_lock(&d->intr_en_notif_lock);
+ list_for_each(cur, &d->intr_en_notif_list) {
+ cur_entry = list_entry(cur, struct intr_en_notif, links);
+ if(cur_entry->notif_vector == vector) {
+ vi->intr_en_notif_fn = cur_entry->notif_fn;
+ vi->intr_en_notif_vec = vector;
+ vi->intr_en_notif_state = 0;
+ hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v,
vi->intr_en_notif_vec, 1);
+ break;
+ }
+ }
+ spin_unlock(&d->intr_en_notif_lock);
+ }
+ return;
+}
+
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked)
+{
+ if(v->int_notif.intr_en_notif_fn) {
+ vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+ if(vi->intr_en_notif_state && !(irq_masked || vlapic_tpr_gte_vec(v,
vi->intr_en_notif_vec))) {
+ hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v,
vi->intr_en_notif_vec, 0);
+ vi->intr_en_notif_fn = 0;
+ vi->intr_en_notif_state = 0;
+ }
+ else if (irq_masked || vlapic_tpr_gte_vec(v, vi->intr_en_notif_vec))
+ vi->intr_en_notif_state = 1;
+ }
+}
/*
* Local variables:
* mode: C
diff -r ec3493b63170 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -88,11 +88,35 @@
spin_unlock(&d->arch.hvm_domain.irq_lock);
}
+void hvm_isa_irq_assert_cb(
+ struct domain *d, unsigned int isa_irq,
+ void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t
intrs_delivered),
+ uint64_t cb_arg)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+
+ ASSERT(isa_irq <= 15);
+
+ spin_lock(&d->arch.hvm_domain.irq_lock);
+
+ if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
+ (hvm_irq->gsi_assert_count[gsi]++ == 0) )
+ {
+ vioapic_register_delivered_cb(d, intrs_delivered_cb, cb_arg);
+ vioapic_irq_positive_edge(d, gsi);
+ vpic_irq_positive_edge(d, isa_irq);
+ }
+
+ spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
void hvm_isa_irq_assert(
struct domain *d, unsigned int isa_irq)
{
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
ASSERT(isa_irq <= 15);
diff -r ec3493b63170 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -148,7 +148,10 @@
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
struct hvm_intack intack;
+ hvm_intr_en_notif_disarm(v, irq_masked(vmcb->rflags));
+
/* Crank the handle on interrupt state. */
+
pt_update_irq(v);
svm_dirq_assist(v);
@@ -177,7 +180,6 @@
enable_intr_window(v, intack);
return;
}
-
intack = hvm_vcpu_ack_pending_irq(v, intack);
} while ( intack.source == hvm_intsrc_none );
@@ -189,6 +191,7 @@
{
HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
svm_inject_extint(v, intack.vector);
+ hvm_intr_en_notif_arm(v, intack.vector);
pt_intr_post(v, intack);
}
diff -r ec3493b63170 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -306,7 +306,18 @@
return pt_active(&pit->pt0);
}
-static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
+void vioapic_register_delivered_cb(struct domain *d, void
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+ uint64_t cb_arg)
+{
+ struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+
+ ASSERT(spin_is_locked(&vioapic_domain(vioapic)->arch.hvm_domain.irq_lock));
+
+ vioapic->intrs_delivered_cb = intrs_delivered_cb;
+ vioapic->cb_arg = cb_arg;
+}
+
+void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
{
uint16_t dest = vioapic->redirtbl[irq].fields.dest_id;
uint8_t dest_mode = vioapic->redirtbl[irq].fields.dest_mode;
@@ -314,6 +325,7 @@
uint8_t vector = vioapic->redirtbl[irq].fields.vector;
uint8_t trig_mode = vioapic->redirtbl[irq].fields.trig_mode;
uint32_t deliver_bitmask;
+ uint32_t deliver_bitmask_final = (uint32_t)0;
struct vlapic *target;
struct vcpu *v;
@@ -348,6 +360,11 @@
vector, deliver_bitmask);
if ( target != NULL )
{
+ set_bit(vlapic_vcpu(target)->vcpu_id, &deliver_bitmask_final);
+ if(vioapic->intrs_delivered_cb) {
+ (*vioapic->intrs_delivered_cb)(vioapic->cb_arg,
deliver_bitmask_final);
+ vioapic->intrs_delivered_cb = (void *)0;
+ }
ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
}
else
@@ -362,25 +379,23 @@
case dest_Fixed:
{
uint8_t bit;
+ if(vioapic->intrs_delivered_cb) {
+ (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, deliver_bitmask);
+ vioapic->intrs_delivered_cb = 0;
+ }
for ( bit = 0; deliver_bitmask != 0; bit++ )
{
if ( !(deliver_bitmask & (1 << bit)) )
continue;
deliver_bitmask &= ~(1 << bit);
-#ifdef IRQ0_SPECIAL_ROUTING
- /* Do not deliver timer interrupts to VCPU != 0 */
- if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() )
- v = vioapic_domain(vioapic)->vcpu[0];
- else
-#endif
- v = vioapic_domain(vioapic)->vcpu[bit];
- if ( v != NULL )
- {
- target = vcpu_vlapic(v);
- ioapic_inj_irq(vioapic, target, vector,
- trig_mode, delivery_mode);
- }
- }
+ v = vioapic_domain(vioapic)->vcpu[bit];
+ /* ioapic_get_delivery_bitmask guarantees that v is never NULL. */
+ if( v != NULL ) {
+ target = vcpu_vlapic(v);
+ ioapic_inj_irq(vioapic, target, vector,
+ trig_mode, delivery_mode);
+ }
+ }
break;
}
@@ -404,6 +419,23 @@
delivery_mode);
break;
}
+}
+
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int
*vector)
+{
+ struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+ union vioapic_redir_entry *ent;
+ int ret = 1;
+ unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+ spin_lock(&d->arch.hvm_domain.irq_lock);
+ ent = &vioapic->redirtbl[gsi];
+ if ( !ent->fields.mask ) {
+ *vector = ent->fields.vector;
+ ret = 0;
+ }
+ spin_unlock(&d->arch.hvm_domain.irq_lock);
+ return ret;
}
void vioapic_irq_positive_edge(struct domain *d, unsigned int irq)
diff -r ec3493b63170 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -113,7 +113,6 @@
/*
* IRR-specific bitmap update & search routines.
*/
-
static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic)
{
return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]);
@@ -165,6 +164,12 @@
vlapic, ppr, isr, isrv);
return ppr;
+}
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector)
+{
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ return ((vector & 0xf0) <= vlapic_get_ppr(vlapic));
}
int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda)
diff -r ec3493b63170 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,6 +163,10 @@
struct vcpu *v = current;
unsigned int tpr_threshold = 0;
enum hvm_intblk intblk;
+ unsigned long eflags;
+
+ eflags = __vmread(GUEST_RFLAGS);
+ hvm_intr_en_notif_disarm(v, irq_masked(eflags));
/* Crank the handle on interrupt state. */
pt_update_irq(v);
@@ -200,6 +204,7 @@
{
HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
vmx_inject_extint(v, intack.vector);
+ hvm_intr_en_notif_arm(v, intack.vector);
pt_intr_post(v, intack);
}
diff -r ec3493b63170 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -36,6 +36,7 @@
string_param("clocksource", opt_clocksource);
#define EPOCH MILLISECS(1000)
+#define HPET_PERIOD_SIMULATED 0x429b17fUL
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
DEFINE_SPINLOCK(rtc_lock);
@@ -348,6 +349,7 @@
return hpet_read32(HPET_COUNTER);
}
+int hpet_physical_inited = 0;
static int init_hpet(struct platform_timesource *pts)
{
u64 hpet_rate = hpet_setup();
@@ -359,6 +361,8 @@
pts->frequency = hpet_rate;
pts->read_counter = read_hpet_count;
pts->counter_bits = 32;
+
+ hpet_physical_inited = 1;
return 1;
}
@@ -500,6 +504,64 @@
return stime;
}
+static int hpet_main_counter_phys_avoid_hdw = 0;
+boolean_param("hpet_avoid", hpet_main_counter_phys_avoid_hdw);
+static unsigned long hpet_main_counter_phys_avoid_hdw_period;
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period)
+#define hpet_tick_to_ns(tick, period) hpet_mult_div(tick, period, 1000000UL);
+#define TSC_TO_NSEC(tsc) hpet_mult_div(tsc, 1000000UL, cpu_khz)
+
+typedef struct {
+ spinlock_t lock;
+ s_time_t last_ret;
+} get_s_time_mono_t;
+
+static get_s_time_mono_t get_s_time_mon;
+
+static void get_s_time_mono_init(void)
+{
+ spin_lock_init(&get_s_time_mon.lock);
+}
+
+u64 read_64_main_counter(void)
+{
+ u64 count;
+ unsigned long flags;
+ struct cpu_time *t = &this_cpu(cpu_time);
+ u64 tsc, delta;
+ s_time_t now;
+
+ if(hpet_main_counter_phys_avoid_hdw || !hpet_physical_inited) {
+ spin_lock(&get_s_time_mon.lock);
+ rdtscll(tsc);
+ delta = tsc - t->local_tsc_stamp;
+ now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+ if(now > get_s_time_mon.last_ret)
+ get_s_time_mon.last_ret = now;
+ else
+ now = get_s_time_mon.last_ret;
+ spin_unlock(&get_s_time_mon.lock);
+ if(!hpet_main_counter_phys_avoid_hdw_period)
+ hpet_main_counter_phys_avoid_hdw_period = read_hpet_period();
+ count = hpet_phys_ns_to_ticks(now,
hpet_main_counter_phys_avoid_hdw_period);
+ }
+ else {
+ spin_lock_irqsave(&platform_timer_lock, flags);
+ count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) &
plt_mask);
+ spin_unlock_irqrestore(&platform_timer_lock, flags);
+ }
+ return count;
+}
+u64 read_hpet_period(void)
+{
+ unsigned long period;
+
+ if(hpet_physical_inited)
+ period = (unsigned long)hpet_read32(HPET_PERIOD);
+ else
+ period = HPET_PERIOD_SIMULATED;
+ return period;
+}
static void platform_time_calibration(void)
{
@@ -559,6 +621,7 @@
plt_overflow(NULL);
platform_timer_stamp = plt_stamp64;
+ get_s_time_mono_init();
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
diff -r ec3493b63170 xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -96,6 +96,8 @@
spin_lock_init(&d->hypercall_deadlock_mutex);
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
+ spin_lock_init(&d->intr_en_notif_lock);
+ INIT_LIST_HEAD(&d->intr_en_notif_list);
return d;
}
diff -r ec3493b63170 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -23,6 +23,7 @@
#include <asm/current.h>
#include <asm/x86_emulate.h>
+#include <asm/processor.h>
#include <public/domctl.h>
#include <public/hvm/save.h>
@@ -218,10 +219,20 @@
hvm_funcs.set_segment_register(v, seg, reg);
}
+static inline int irq_masked(unsigned long eflags)
+{
+ return ((eflags & X86_EFLAGS_IF) == 0);
+}
+
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void hvm_migrate_timers(struct vcpu *v);
void hvm_do_resume(struct vcpu *v);
+
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector,
int notif_fn_index);
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector);
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector);
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked);
static inline void
hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
diff -r ec3493b63170 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -160,6 +160,10 @@
struct domain *d, unsigned int isa_irq);
void hvm_isa_irq_deassert(
struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_assert_cb(
+ struct domain *d, unsigned int isa_irq,
+ void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t
intrs_delivered),
+ uint64_t cb_arg);
void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
diff -r ec3493b63170 xen/include/asm-x86/hvm/vioapic.h
--- a/xen/include/asm-x86/hvm/vioapic.h
+++ b/xen/include/asm-x86/hvm/vioapic.h
@@ -66,5 +66,7 @@
void vioapic_reset(struct domain *d);
void vioapic_irq_positive_edge(struct domain *d, unsigned int irq);
void vioapic_update_EOI(struct domain *d, int vector);
-
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int
*vector);
+void vioapic_register_delivered_cb(struct domain *d, void
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+ uint64_t cb_arg);
#endif /* __ASM_X86_HVM_VIOAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -98,4 +98,6 @@
int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector);
+
#endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -50,23 +50,45 @@
uint64_t fsb; /* FSB route, not supported now */
} timers[HPET_TIMER_NUM];
- /* Hidden register state */
+ /* The rest of this struct is hidden register state */
+
+ /* Per timer state */
uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+ uint32_t vector[HPET_TIMER_NUM];
+
+ /* Timer 0 (clock) specific state */
+
+ uint64_t last_end_of_intr_mc;
+ uint64_t end_of_intr_mc;
+ uint64_t intr_pending_nr;
+ uint64_t pending_mask;
+ uint32_t delivery_policy;
+
+ /* Global state */
+
+ uint64_t phys_period;
+ uint64_t cpu_khz;
+ uint64_t migr_local_tsc;
+
+ /* Debug */
+
+ uint64_t intr_counts[INTR_CNT_BUCKETS];
+ uint64_t intr_counts_last_s;
};
typedef struct HPETState {
struct hpet_registers hpet;
struct vcpu *vcpu;
uint64_t stime_freq;
- uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
- uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */
uint64_t mc_offset;
struct timer timers[HPET_TIMER_NUM];
struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM];
spinlock_t lock;
} HPETState;
-
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int
post);
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned
int post);
+void hpet_notify_timer_mode(struct domain *d, uint64_t value);
/*
* Abstract layer of periodic time, one short time.
*/
diff -r ec3493b63170 xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -47,6 +47,12 @@
/*
* Processor
*/
+
+typedef struct vcpu_intr_en_notif {
+ int intr_en_notif_fn;
+ int intr_en_notif_state;
+ unsigned int intr_en_notif_vec;
+} vcpu_intr_en_notif_t;
struct hvm_hw_cpu {
uint8_t fpu_regs[512];
@@ -156,6 +162,7 @@
};
/* error code for pending event */
uint32_t error_code;
+ vcpu_intr_en_notif_t int_notif;
};
DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
@@ -253,6 +260,8 @@
#endif
} fields;
} redirtbl[VIOAPIC_NUM_PINS];
+ void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered);
+ uint64_t cb_arg;
};
DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
@@ -366,6 +375,11 @@
* HPET
*/
+#define HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS 0 /* Linux */
+#define HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS 1 /* Windows */
+#define HPET_DEL_POLICY_NUMS 2
+#define INTR_CNT_BUCKETS 20
+
#define HPET_TIMER_NUM 3 /* 3 timers supported now */
struct hvm_hw_hpet {
/* Memory-mapped, software visible registers */
@@ -385,8 +399,30 @@
} timers[HPET_TIMER_NUM];
uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */
- /* Hidden register state */
+ /* The rest of this struct is hidden register state */
+
+ /* Per timer state */
uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+ uint32_t vector[HPET_TIMER_NUM];
+
+ /* Timer 0 (clock) specific state */
+
+ uint64_t last_end_of_intr_mc;
+ uint64_t end_of_intr_mc;
+ uint64_t intr_pending_nr;
+ uint64_t pending_mask;
+ uint32_t delivery_policy;
+
+ /* Global state */
+
+ uint64_t phys_period;
+ uint64_t cpu_khz;
+ uint64_t migr_local_tsc;
+
+ /* Debug */
+
+ uint64_t intr_counts[INTR_CNT_BUCKETS];
+ uint64_t intr_counts_last_s;
};
DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
diff -r ec3493b63170 xen/include/public/hvm/params.h
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -80,6 +80,8 @@
#define HVMPTM_no_delay_for_missed_ticks 1
#define HVMPTM_no_missed_ticks_pending 2
#define HVMPTM_one_missed_tick_pending 3
+#define HVM_HPET_guest_computes_missed_ticks 4
+#define HVM_HPET_guest_does_not_compute_missed_ticks 5
/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
#define HVM_PARAM_HPET_ENABLED 11
diff -r ec3493b63170 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -69,6 +69,19 @@
int evtchn_init(struct domain *d);
void evtchn_destroy(struct domain *d);
+typedef void (*notif_fn_t)(struct vcpu *v, unsigned int vector, unsigned int
post);
+
+#define HVM_INTR_EN_NOTIF_UNUSED 0
+#define HVM_INTR_EN_NOTIF_HPET_MISSED 1
+#define HVM_INTR_EN_NOTIF_HPET_NO_MISSED 2
+#define HVM_INTR_EN_NOTIF_MAX 2
+
+typedef struct intr_en_notif {
+ struct list_head links;
+ unsigned int notif_vector;
+ int notif_fn;
+} intr_en_notif_t;
+
struct vcpu
{
int vcpu_id;
@@ -135,6 +148,7 @@
cpumask_t vcpu_dirty_cpumask;
struct arch_vcpu arch;
+ vcpu_intr_en_notif_t int_notif;
};
/* Per-domain lock can be recursively acquired in fault handlers. */
@@ -232,6 +246,10 @@
int32_t time_offset_seconds;
struct rcu_head rcu;
+
+ spinlock_t intr_en_notif_lock;
+ unsigned long intr_en_notif_bitmap[(MAX_VECTOR/sizeof(unsigned long))+1];
+ struct list_head intr_en_notif_list;
unsigned long last_tsc_sender;
unsigned long first_tsc_receiver;
@@ -508,6 +526,18 @@
if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
vcpu_wake(v);
}
+/*
+ * compute (var*num)/den where var*num may overflow 64 bits
+ */
+static inline uint64_t hpet_mult_div(uint64_t var, uint64_t num, uint64_t den)
+{
+ uint64_t result, q, r;
+
+ q = var / den;
+ r = var % den;
+ result = (q * num) + (r * num) / den;
+ return result;
+}
#define IS_PRIV(_d) ((_d)->is_privileged)
#define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target ==
(_t)))
diff -r ec3493b63170 xen/include/xen/time.h
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -61,6 +61,11 @@
extern void send_timer_event(struct vcpu *v);
+u64 read_64_main_counter(void);
+u64 read_hpet_period(void);
+
+extern int hpet_physical_inited;
+
#endif /* __XEN_TIME_H__ */
/*
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|