WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/2] Improve hpet accuracy

To: xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>, Dave Winchell <dwinchell@xxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 2/2] Improve hpet accuracy
From: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
Date: Thu, 05 Jun 2008 11:00:06 -0400
Delivery-date: Thu, 05 Jun 2008 08:01:20 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.14 (X11/20080501)
This patch performs the bulk of the changes described in 0/2 description above, to improve HPET accuracy

Signed-off-by: Dave Winchell <dwinchell@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
diff -r ec3493b63170 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -24,14 +24,11 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 
+
 #define HPET_BASE_ADDRESS   0xfed00000ULL
 #define HPET_MMAP_SIZE      1024
 #define S_TO_NS  1000000000ULL           /* 1s  = 10^9  ns */
 #define S_TO_FS  1000000000000000ULL     /* 1s  = 10^15 fs */
-
-/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
-#define STIME_PER_HPET_TICK 16
-#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
 
 #define HPET_ID         0x000
 #define HPET_PERIOD     0x004
@@ -72,8 +69,9 @@
                     << HPET_TN_INT_ROUTE_CAP_SHIFT)
 
 #define hpet_tick_to_ns(h, tick)                        \
-    ((s_time_t)((((tick) > (h)->hpet_to_ns_limit) ?     \
-        ~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10))
+    (s_time_t)hpet_mult_div(tick, h->hpet.phys_period, 1000000UL)
+
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period)
 
 #define timer_config(h, n)       (h->hpet.timers[n].config)
 #define timer_is_periodic(h, n)  (timer_config(h, n) & HPET_TN_PERIODIC)
@@ -139,15 +137,34 @@
 
     return 0;
 }
-
 static inline uint64_t hpet_read_maincounter(HPETState *h)
 {
-    ASSERT(spin_is_locked(&h->lock));
+    uint64_t mc;
 
-    if ( hpet_enabled(h) )
-        return guest_time_hpet(h->vcpu) + h->mc_offset;
-    else 
-        return h->hpet.mc64;
+    mc = read_64_main_counter() + h->mc_offset;
+    return mc;
+}
+static inline uint64_t hpet_compute_diff(HPETState *h, int tn)
+{
+
+    if ( timer_is_32bit(h, tn) ) {
+       uint32_t tn_cmp, diff, mc;
+
+       tn_cmp = (uint32_t)h->hpet.timers[tn].cmp;
+       mc = (uint32_t)hpet_read_maincounter(h);
+       diff = tn_cmp - mc;
+       diff = (int32_t)diff > 0 ? diff : (uint32_t)0;
+       return (uint64_t)diff;
+    }
+    else {
+       uint64_t tn_cmp, diff, mc;
+       
+       mc = hpet_read_maincounter(h);
+       tn_cmp = h->hpet.timers[tn].cmp;
+       diff = tn_cmp - mc;
+       diff = (int64_t)diff > 0 ? diff : (uint64_t)0;
+       return diff;
+    }
 }
 
 static int hpet_read(
@@ -190,13 +207,9 @@
     stop_timer(&h->timers[tn]);
 }
 
-/* the number of HPET tick that stands for
- * 1/(2^10) second, namely, 0.9765625 milliseconds */
-#define  HPET_TINY_TIME_SPAN  ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
-
 static void hpet_set_timer(HPETState *h, unsigned int tn)
 {
-    uint64_t tn_cmp, cur_tick, diff;
+    uint64_t diff;
 
     ASSERT(tn < HPET_TIMER_NUM);
     ASSERT(spin_is_locked(&h->lock));
@@ -209,25 +222,7 @@
         pit_stop_channel0_irq(pit);
     }
 
-    tn_cmp   = h->hpet.timers[tn].cmp;
-    cur_tick = hpet_read_maincounter(h);
-    if ( timer_is_32bit(h, tn) )
-    {
-        tn_cmp   = (uint32_t)tn_cmp;
-        cur_tick = (uint32_t)cur_tick;
-    }
-
-    diff = tn_cmp - cur_tick;
-
-    /*
-     * Detect time values set in the past. This is hard to do for 32-bit
-     * comparators as the timer does not have to be set that far in the future
-     * for the counter difference to wrap a 32-bit signed integer. We fudge
-     * by looking for a 'small' time value in the past.
-     */
-    if ( (int64_t)diff < 0 )
-        diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
-            ? (uint32_t)diff : 0;
+    diff = hpet_compute_diff(h, tn);
 
     set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
 }
@@ -273,14 +268,15 @@
         if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) )
         {
             /* Enable main counter and interrupt generation. */
-            h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu);
+
+           h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+           
             for ( i = 0; i < HPET_TIMER_NUM; i++ )
                 hpet_set_timer(h, i); 
         }
         else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) )
         {
             /* Halt main counter and disable interrupt generation. */
-            h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu);
             for ( i = 0; i < HPET_TIMER_NUM; i++ )
                 hpet_stop_timer(h, i);
         }
@@ -291,6 +287,9 @@
             gdprintk(XENLOG_WARNING, 
                      "HPET: writing main counter but it's not halted!\n");
         h->hpet.mc64 = new_val;
+
+       h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+
         break;
 
     case HPET_T0_CFG:
@@ -333,7 +332,7 @@
              *  - maximum is to prevent overflow in time_after() calculations
              */
             if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) )
-                new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale;
+               new_val = hpet_phys_ns_to_ticks(MICROSECS(900), 
h->hpet.phys_period);
             new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1;
             h->hpet.period[tn] = new_val;
         }
@@ -373,10 +372,216 @@
     .write_handler = hpet_write
 };
 
+static void hpet_stats_dump_dom(struct domain *d)
+{
+    struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+    unsigned long mc, s;
+    int i;
+
+    printk("domain %d\n", d->domain_id);
+    mc = hpet_read_maincounter(h);
+    s = hpet_tick_to_ns(h, mc);
+    s = s / 1000000000UL;
+
+    printk("cur index %ld\n", s % INTR_CNT_BUCKETS);
+    for(i = 0; i < INTR_CNT_BUCKETS; i++) {
+       if(!(i%10))
+           printk("\n");
+       printk("%ld ", h->hpet.intr_counts[i]);
+    }
+    printk("\n");
+}
+static void hpet_state_dump(struct domain *d)
+{
+    struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+    printk("timers.config: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].config, 
h->hpet.timers[1].config, h->hpet.timers[2].config);
+    printk("timers.cmp: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].cmp, 
h->hpet.timers[1].cmp, h->hpet.timers[2].cmp);
+    printk("current mc: 0x%lx\n", hpet_read_maincounter(h));
+    printk("period: %lx %lx %lx\n", h->hpet.period[0], h->hpet.period[1], 
h->hpet.period[1]);
+    printk("mc_offset 0x%lx\n",h->mc_offset);
+    printk("phys_period 0x%lx\n",h->hpet.phys_period);
+    printk("last_end_of_intr_mc 0x%lx\n",h->hpet.last_end_of_intr_mc);
+    printk("end_of_intr_mc 0x%lx\n",h->hpet.end_of_intr_mc);
+    printk("cpu_khz 0x%lx\n",h->hpet.cpu_khz);
+    printk("migr_local_tsc 0x%lx\n",h->hpet.migr_local_tsc);
+    printk("intr_pending_nr 0x%lx\n",h->hpet.intr_pending_nr);
+    printk("pending_mask 0x%lx\n",h->hpet.pending_mask);
+    printk("delivery_policy %d\n",h->hpet.delivery_policy);
+    printk("vector 0x%x 0x%x 
0x%x\n",h->hpet.vector[0],h->hpet.vector[1],h->hpet.vector[2]);
+}
+static void hpet_stats_dump(unsigned char c)
+{
+    struct domain *d;
+
+    for_each_domain(d) {
+       if(d->domain_id) {
+           hpet_stats_dump_dom(d);
+           hpet_state_dump(d);
+       }
+    }
+}
+#include <xen/keyhandler.h>
+static __init int hpet_stats_dump_keyhandler_init(void)
+{
+    register_keyhandler('Z', hpet_stats_dump,"hpet_stats_dump");
+    return 0;
+}
+__initcall(hpet_stats_dump_keyhandler_init);
+static void hpet_stats(struct vcpu *v)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+    unsigned long mc, s, u;
+
+    mc = hpet_read_maincounter(h);
+    s = hpet_tick_to_ns(h, mc);
+    s = s / 1000000000UL;
+    if(h->hpet.intr_counts_last_s && (s > h->hpet.intr_counts_last_s)) {
+       for(u = (h->hpet.intr_counts_last_s + 1); u <= s; u++)
+           h->hpet.intr_counts[u % INTR_CNT_BUCKETS] = 0;
+    }
+    h->hpet.intr_counts_last_s = s;
+    h->hpet.intr_counts[s % INTR_CNT_BUCKETS]++;
+}
+
+
+
+static void hpet_vioapic_del_cb(uint64_t arg, uint32_t intrs_delivered)
+{
+    HPETState *h = (HPETState *)arg;
+
+    h->hpet.pending_mask = intrs_delivered;
+}
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int 
post)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+    spin_lock(&h->lock);
+    if(post)
+       hpet_stats(v);
+    else {
+       clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+       if(!(h->hpet.pending_mask))
+           h->hpet.end_of_intr_mc = hpet_read_maincounter(h);
+    }
+    spin_unlock(&h->lock);
+}
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned 
int post)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+    spin_lock(&h->lock);
+    if(post) {
+       hpet_stats(v);
+       if(!h->hpet.intr_pending_nr) {
+           // probably should kill domain here
+           printk("hpet_intr_en_fn: unexpected cleared intr_pending_nr 
pending_mask 0x%lx\n", h->hpet.pending_mask);
+           spin_unlock(&h->lock);
+           return;
+       }
+       clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+
+       if(!(h->hpet.pending_mask)) {
+           h->hpet.intr_pending_nr--;
+           if(h->hpet.intr_pending_nr) {
+               hvm_isa_irq_deassert(v->domain, 0);
+               hvm_isa_irq_assert_cb(v->domain, 0, hpet_vioapic_del_cb, 
(uint64_t)h);      
+           }
+       }
+    }
+    spin_unlock(&h->lock);
+}
+
+/* For guest computes missed policy,
+ * we will only route the interrupt if a) the last interrupt routed has been 
processed by the guest
+ * and b) its been more than a (clock) periods worth of main counter ticks 
since that interrupt
+ * was processed.
+ *
+ * It was found, through experimentation, that Linux guests keep very accurate 
time for hpet with
+ * this logic, even if it means we are only delivering every 2*period. This is 
because the Linux logic
+ * for missed ticks is very good for hpet. On the other hand, delivering the 
interrupt just slightly
+ * early causes poor timekeeping.
+ *
+ * It was also found that time stamping at the end of interrupt processing 
improved accuracy over
+ * time stamping at injection time. This is probably due to the delay that can 
happen in the Linux
+ * interrupt handler if it has to wait for a lock.
+ *
+ */
+
+static void hpet_route_decision_missed_ticks(HPETState *h, unsigned int tn, 
int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+    uint64_t mc;
+    struct domain *d = h->vcpu->domain;
+    unsigned int vector;
+
+    *route = 0;
+    *cb_expected = 0;
+    if(!vioapic_get_vector(d, isa_irq, &vector)) {
+       mc = hpet_read_maincounter(h);
+       if(h->hpet.vector[tn] == ~0U) {
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_MISSED))
+               panic(__FUNCTION__);
+           h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc = 0;
+           *cb_expected = 1;
+           *route = 1;
+       }
+       else if((h->hpet.end_of_intr_mc != h->hpet.last_end_of_intr_mc) &&
+               ((mc - h->hpet.end_of_intr_mc) >  h->hpet.period[tn])) {
+           if(vector != h->hpet.vector[tn]) {
+               hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+               h->hpet.vector[tn] = vector;
+               if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_MISSED))
+                   panic(__FUNCTION__);                        
+           }
+           h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc;
+           *cb_expected = 1;
+           *route = 1;
+       }
+    }
+    else
+       *route = 1;
+}
+static void hpet_route_decision_not_missed_ticks(HPETState *h, unsigned int 
tn, int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+    struct domain *d = h->vcpu->domain;
+    unsigned int vector;
+
+    *route = 0;
+    *cb_expected = 0;
+    if(!vioapic_get_vector(d, isa_irq, &vector)) {
+       if(h->hpet.vector[tn] == ~0U) {
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+               panic(__FUNCTION__);
+       }
+       if(h->hpet.intr_pending_nr++)
+           return;
+
+       if(vector != h->hpet.vector[tn]) {
+           hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+               panic(__FUNCTION__);                    
+       }
+       *cb_expected = 1;
+       *route = 1;
+    }
+    else
+       *route = 1;
+}
+
+typedef void (*hpet_route_fn_t)(HPETState *h, unsigned int tn, int isa_irq, 
unsigned int *route, unsigned int *cb_expected);
+
+static hpet_route_fn_t hpet_determine_route_params[HPET_DEL_POLICY_NUMS] = 
{hpet_route_decision_missed_ticks,
+                                                                    
hpet_route_decision_not_missed_ticks};
+
 static void hpet_route_interrupt(HPETState *h, unsigned int tn)
 {
     unsigned int tn_int_route = timer_int_route(h, tn);
     struct domain *d = h->vcpu->domain;
+    unsigned int route = 1;
+    unsigned int cb_expected = 0;
 
     ASSERT(spin_is_locked(&h->lock));
 
@@ -386,8 +591,18 @@
            timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
            timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
         int isa_irq = (tn == 0) ? 0 : 8;
-        hvm_isa_irq_deassert(d, isa_irq);
-        hvm_isa_irq_assert(d, isa_irq);
+
+       if(!tn)
+           (*hpet_determine_route_params[h->hpet.delivery_policy])(h, tn, 
isa_irq, &route, &cb_expected);
+
+       if(route) {
+           hvm_isa_irq_deassert(d, isa_irq);
+           if(cb_expected) {
+               hvm_isa_irq_assert_cb(d, isa_irq, hpet_vioapic_del_cb, 
(uint64_t)h);
+           }
+           else
+               hvm_isa_irq_assert(d, isa_irq);
+       }
         return;
     }
 
@@ -405,6 +620,46 @@
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+
+
+static void hpet_timer0_timeout_missed_ticks(HPETState *h)
+{
+    uint64_t mc = hpet_read_maincounter(h);
+    unsigned int tn = 0;
+    uint64_t period = h->hpet.period[tn];
+
+    if ( timer_is_32bit(h, tn) )
+    {
+       while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+           h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + 
period);
+    }
+    else
+    {
+       while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+           h->hpet.timers[tn].cmp += period;
+    }
+    set_timer(&h->timers[tn], 
+             NOW() + hpet_tick_to_ns(h, period));
+}
+static void hpet_timer0_timeout_not_missed_ticks(HPETState *h)
+{
+    unsigned int tn = 0;
+    uint64_t diff;
+    uint64_t period = h->hpet.period[tn];
+
+    if ( timer_is_32bit(h, tn) )
+       h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + period);
+    else
+       h->hpet.timers[tn].cmp += period;   
+
+    diff = hpet_compute_diff(h, tn);
+    set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
+}
+
+typedef void (*hpet_timer0_timeout_fn_t)(HPETState *h);
+
+static hpet_timer0_timeout_fn_t hpet_timer0_timeout[HPET_DEL_POLICY_NUMS] = 
{hpet_timer0_timeout_missed_ticks,
+                                                                     
hpet_timer0_timeout_not_missed_ticks};
 static void hpet_timer_fn(void *opaque)
 {
     struct HPET_timer_fn_info *htfi = opaque;
@@ -424,19 +679,25 @@
 
     if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) )
     {
-        uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
-        if ( timer_is_32bit(h, tn) )
-        {
-            while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
-                h->hpet.timers[tn].cmp = (uint32_t)(
-                    h->hpet.timers[tn].cmp + period);
-        }
-        else
-        {
-            while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
-                h->hpet.timers[tn].cmp += period;
-        }
-        set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+       if(!tn)
+           (*hpet_timer0_timeout[h->hpet.delivery_policy])(h);
+
+       else
+       {
+           uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
+           if ( timer_is_32bit(h, tn) )
+           {
+               while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+                   h->hpet.timers[tn].cmp = (uint32_t)(
+                                                       h->hpet.timers[tn].cmp 
+ period);
+           }
+           else
+           {
+               while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+                   h->hpet.timers[tn].cmp += period;
+           }
+           set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+       }
     }
 
     spin_unlock(&h->lock);
@@ -462,7 +723,10 @@
     spin_lock(&hp->lock);
 
     /* Write the proper value into the main counter */
-    hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp->vcpu);
+
+    hp->hpet.mc64 = hpet_read_maincounter(hp);
+    rdtscll(hp->hpet.migr_local_tsc);
+    hp->hpet.cpu_khz = cpu_khz;
 
     /* Save the HPET registers */
     rc = _hvm_init_entry(h, HVM_SAVE_CODE(HPET), 0, HVM_SAVE_LENGTH(HPET));
@@ -488,19 +752,70 @@
         C(period[0]);
         C(period[1]);
         C(period[2]);
+       C(vector[0]);
+       C(vector[1]);
+       C(vector[2]);
+       C(last_end_of_intr_mc);
+       C(end_of_intr_mc);
+       C(intr_pending_nr);
+       C(pending_mask);
+       C(delivery_policy);
+       C(phys_period);
+       C(cpu_khz);
+       C(migr_local_tsc);
+       C(intr_counts_last_s);
 #undef C
+       memcpy(rec->intr_counts, hp->hpet.intr_counts, 
sizeof(hp->hpet.intr_counts));
     }
 
     spin_unlock(&hp->lock);
 
     return rc;
 }
+static int hpet_debug_migr_check_period(struct domain *d, HPETState *hp)
+{
+    unsigned long period, m_period, delta;
+
+    period = read_hpet_period();
+    m_period = (hp->hpet.capability >> 32) & 0xffffffffUL;
+    delta = (period > m_period) ? (period - m_period) : (m_period - period);
+    if(delta) {
+       /* Some hpets report small differences in period. A difference of 1 has 
been seen.
+        * Allow 100 as that is still 0.00014%, which is small enough.
+        */
+       printk("hpet.capability 0x%lx ((hp->hpet.capability >> 32) & 
0xffffffffUL) 0x%lx period %lx\n",
+              hp->hpet.capability,
+              ((hp->hpet.capability >> 32) & 0xffffffffUL),
+              period);
+       if(delta > 100) {
+           printk("hpet period difference %ld too large\n", delta);
+           return 1;
+       }
+    }
+    return 0;
+}
+#define HPET_MIGR_TICK_ADJUSTMENT 1
+/*
+ * HPET_MIGR_TICK_ADJUSTMENT -
+ *   This corrects for some of the time between hpet save on the sending node
+ *   and hpet load on the receiving node. The correction has been found to be 
quite small,
+ *   300-400 usec. This adjustment is based on sending a final message in 
migrate with the
+ *   tsc at send time (last_tsc_sender) in the message. Upon reception the tsc 
(first_tsc_receiver)
+ *   is recorded. So the only time we are not taking into account is the time 
the message is in
+ *   transit.
+ *   The reason this adjustment is in here, given how small it is, is that 
there may be circumstances,
+ *   for example a node heavily loaded with other guests, where the adjustment 
would be significant.
+ */
 
 static int hpet_load(struct domain *d, hvm_domain_context_t *h)
 {
     HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet;
     struct hvm_hw_hpet *rec;
     int i;
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+    unsigned long now, dt1, dt2, dt1ticks, dt2ticks, period;
+#endif
+  
 
     spin_lock(&hp->lock);
 
@@ -531,11 +846,50 @@
         C(period[0]);
         C(period[1]);
         C(period[2]);
+       C(vector[0]);
+       C(vector[1]);
+       C(vector[2]);
+       C(last_end_of_intr_mc);
+       C(end_of_intr_mc);
+       C(intr_pending_nr);
+       C(pending_mask);
+       C(delivery_policy);
+       C(phys_period);
+       C(cpu_khz);
+       C(migr_local_tsc);
+       C(intr_counts_last_s);
 #undef C
+
+       memcpy(hp->hpet.intr_counts, rec->intr_counts, 
sizeof(hp->hpet.intr_counts));
     
-    /* Recalculate the offset between the main counter and guest time */
-    hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu);
-                
+       /* Recalculate the offset between the main counter and guest time */
+
+       if(hpet_debug_migr_check_period(d, hp))
+           return -EINVAL;
+
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+       period = read_hpet_period();
+       rdtscll(now);
+       /*  dt1 is the time delta on the sending node between the sending of 
the last migrate message and the call to hpet_save. */
+       dt1 = ((d->last_tsc_sender - hp->hpet.migr_local_tsc) * 1000UL) / 
hp->hpet.cpu_khz;
+       dt1 = dt1 * 1000UL;
+       dt1ticks = hpet_phys_ns_to_ticks(dt1, period);
+
+       /*  dt2 is the time delta on the reveiving node between now (hpet_load) 
and the reception of the last migrate message. */
+       dt2 = ((now - d->first_tsc_receiver) * 1000UL) / cpu_khz;
+       dt2 = dt2 * 1000UL;
+       dt2ticks = hpet_phys_ns_to_ticks(dt2, period);
+       hp->mc_offset = hp->hpet.mc64 + dt1ticks + dt2ticks - 
read_64_main_counter();
+#else
+       hp->mc_offset = hp->hpet.mc64 - read_64_main_counter();
+#endif
+
+       if(hp->hpet.delivery_policy == 
HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS)
+           hvm_register_intr_en_notif(d, hp->hpet.vector[0], 
HVM_INTR_EN_NOTIF_HPET_MISSED);
+       else if(hp->hpet.delivery_policy == 
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS)
+           hvm_register_intr_en_notif(d, hp->hpet.vector[0], 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED);
+
+                    
     /* Restart the timers */
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
         if ( hpet_enabled(hp) )
@@ -548,6 +902,17 @@
 
 HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM);
 
+void hpet_notify_timer_mode(struct domain *d, uint64_t value)
+{
+    HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+    if(value == HVM_HPET_guest_computes_missed_ticks)
+       h->hpet.delivery_policy = HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS;
+    else if(value == HVM_HPET_guest_does_not_compute_missed_ticks)
+       h->hpet.delivery_policy = 
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS;
+}
+
+
 void hpet_init(struct vcpu *v)
 {
     HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
@@ -557,18 +922,22 @@
 
     spin_lock_init(&h->lock);
 
+    if(hpet_physical_inited)
+       printk("virtual hpet_init: using physical hpet\n");
+    else
+        printk("virtual hpet_init: using simulated hpet\n");
+
     h->vcpu = v;
     h->stime_freq = S_TO_NS;
-
-    h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / 
h->stime_freq;
-    h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
+    h->hpet.phys_period = read_hpet_period();
 
     /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
     h->hpet.capability = 0x8086A201ULL;
 
     /* This is the number of femptoseconds per HPET tick. */
     /* Here we define HPET's frequency to be 1/16 of Xen system time */
-    h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
+
+    h->hpet.capability |= read_hpet_period() << 32;
 
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
     {
@@ -577,6 +946,7 @@
         h->hpet.timers[i].cmp = ~0ULL;
         h->timer_fn_info[i].hs = h;
         h->timer_fn_info[i].tn = i;
+       h->hpet.vector[i] = ~0U;
         init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i],
                    v->processor);
     }
@@ -590,7 +960,6 @@
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
         kill_timer(&h->timers[i]);
 }
-
 void hpet_reset(struct domain *d)
 {
     hpet_deinit(d);
diff -r ec3493b63170 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -458,6 +458,8 @@
         ctxt.dr6 = vc->debugreg[6];
         ctxt.dr7 = vc->debugreg[7];
 
+        ctxt.int_notif = v->int_notif;
+
         if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
             return 1; 
     }
@@ -612,6 +614,9 @@
 
     vc->flags = VGCF_online;
     v->fpu_initialised = 1;
+
+    v->int_notif = ctxt.int_notif;
+
 
     /* Auxiliary processors should be woken immediately. */
     if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
@@ -2382,8 +2387,9 @@
                 hvm_latch_shinfo_size(d);
                 break;
             case HVM_PARAM_TIMER_MODE:
-                if ( a.value > HVMPTM_one_missed_tick_pending )
+                if ( a.value > HVM_HPET_guest_does_not_compute_missed_ticks )
                     rc = -EINVAL;
+                hpet_notify_timer_mode(d, a.value);
                 break;
             case HVM_PARAM_MIG_LAST_TSC:
                 d->last_tsc_sender = a.value;
@@ -2574,7 +2580,98 @@
 
     return rc;
 }
+/* Interrupt inject and completion notification facility.
+ * Register 'notif_fn', to be called whenever 'vector' is injected (post = 1) 
or 
+ * completed (post = 0). Here completed is when the guest re-enables 
interrupts.
+ */
 
+
+
+notif_fn_t hvm_intr_en_notif_fn[HVM_INTR_EN_NOTIF_MAX+1] = {(notif_fn_t)0, 
hpet_intr_en_fn_missed, hpet_intr_en_fn_no_missed};
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, 
int notif_fn_index)
+{
+    intr_en_notif_t *entry;
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+
+    entry = xmalloc(struct intr_en_notif);
+    entry->notif_vector = notif_vector;
+    entry->notif_fn = notif_fn_index;
+    if(!entry) {
+        printk("hvm_register_intr_en_notif: xmalloc failed\n");
+        return 1;
+    }
+    spin_lock(&d->intr_en_notif_lock);
+    list_for_each(cur, &d->intr_en_notif_list) {
+        cur_entry = list_entry(cur, struct intr_en_notif, links);
+        if(cur_entry->notif_vector == notif_vector) {
+            list_del(cur);
+            xfree(cur_entry);
+            break;
+        }
+    }
+    list_add_tail(&entry->links, &d->intr_en_notif_list);
+    set_bit(notif_vector, &d->intr_en_notif_bitmap);    
+    spin_unlock(&d->intr_en_notif_lock);
+    return 0;
+}
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector)
+{
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+
+    spin_lock(&d->intr_en_notif_lock);
+    clear_bit(notif_vector, &d->intr_en_notif_bitmap);
+    list_for_each(cur, &d->intr_en_notif_list) {
+        cur_entry = list_entry(cur, struct intr_en_notif, links);
+        if(cur_entry->notif_vector == notif_vector) {
+            list_del(cur);
+            xfree(cur_entry);
+            break;
+        }
+    }
+    spin_unlock(&d->intr_en_notif_lock);
+    return 0;
+}
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector)
+{
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+    struct domain *d = v->domain;
+
+    if(test_bit(vector, &d->intr_en_notif_bitmap)) {
+        vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+        spin_lock(&d->intr_en_notif_lock);
+        list_for_each(cur, &d->intr_en_notif_list) {
+            cur_entry = list_entry(cur, struct intr_en_notif, links);
+            if(cur_entry->notif_vector == vector) {
+                vi->intr_en_notif_fn = cur_entry->notif_fn;
+                vi->intr_en_notif_vec = vector;
+                vi->intr_en_notif_state = 0;
+                hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, 
vi->intr_en_notif_vec, 1);
+                break;
+            }
+        }
+        spin_unlock(&d->intr_en_notif_lock);
+    }
+    return;
+}
+
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked)
+{
+    if(v->int_notif.intr_en_notif_fn) {
+        vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+        if(vi->intr_en_notif_state && !(irq_masked || vlapic_tpr_gte_vec(v, 
vi->intr_en_notif_vec))) {
+            hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, 
vi->intr_en_notif_vec, 0);
+            vi->intr_en_notif_fn = 0;
+            vi->intr_en_notif_state = 0;
+        }
+        else if (irq_masked || vlapic_tpr_gte_vec(v, vi->intr_en_notif_vec))
+            vi->intr_en_notif_state = 1;
+    }
+}
 /*
  * Local variables:
  * mode: C
diff -r ec3493b63170 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -88,11 +88,35 @@
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+void hvm_isa_irq_assert_cb(
+                          struct domain *d, unsigned int isa_irq,
+                          void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t 
intrs_delivered),
+                          uint64_t cb_arg)
+{
+    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+    unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+
+    ASSERT(isa_irq <= 15);
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+
+    if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
+         (hvm_irq->gsi_assert_count[gsi]++ == 0) )
+    {
+       vioapic_register_delivered_cb(d, intrs_delivered_cb, cb_arg);  
+        vioapic_irq_positive_edge(d, gsi);
+        vpic_irq_positive_edge(d, isa_irq);
+    }
+
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
 void hvm_isa_irq_assert(
     struct domain *d, unsigned int isa_irq)
 {
     struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
 
     ASSERT(isa_irq <= 15);
 
diff -r ec3493b63170 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -148,7 +148,10 @@
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_intack intack;
 
+    hvm_intr_en_notif_disarm(v, irq_masked(vmcb->rflags));
+
     /* Crank the handle on interrupt state. */
+
     pt_update_irq(v);
     svm_dirq_assist(v);
 
@@ -177,7 +180,6 @@
             enable_intr_window(v, intack);
             return;
         }
-
         intack = hvm_vcpu_ack_pending_irq(v, intack);
     } while ( intack.source == hvm_intsrc_none );
 
@@ -189,6 +191,7 @@
     {
         HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
         svm_inject_extint(v, intack.vector);
+        hvm_intr_en_notif_arm(v, intack.vector);
         pt_intr_post(v, intack);
     }
 
diff -r ec3493b63170 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -306,7 +306,18 @@
     return pt_active(&pit->pt0);
 }
 
-static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
+void vioapic_register_delivered_cb(struct domain *d, void 
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+                                  uint64_t cb_arg)
+{
+    struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+
+    ASSERT(spin_is_locked(&vioapic_domain(vioapic)->arch.hvm_domain.irq_lock));
+
+    vioapic->intrs_delivered_cb = intrs_delivered_cb;
+    vioapic->cb_arg = cb_arg;
+}
+
+void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
 {
     uint16_t dest = vioapic->redirtbl[irq].fields.dest_id;
     uint8_t dest_mode = vioapic->redirtbl[irq].fields.dest_mode;
@@ -314,6 +325,7 @@
     uint8_t vector = vioapic->redirtbl[irq].fields.vector;
     uint8_t trig_mode = vioapic->redirtbl[irq].fields.trig_mode;
     uint32_t deliver_bitmask;
+    uint32_t deliver_bitmask_final = (uint32_t)0;
     struct vlapic *target;
     struct vcpu *v;
 
@@ -348,6 +360,11 @@
                                       vector, deliver_bitmask);
         if ( target != NULL )
         {
+           set_bit(vlapic_vcpu(target)->vcpu_id, &deliver_bitmask_final);
+           if(vioapic->intrs_delivered_cb) {
+               (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, 
deliver_bitmask_final);
+               vioapic->intrs_delivered_cb = (void *)0;
+           }
             ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
         }
         else
@@ -362,25 +379,23 @@
     case dest_Fixed:
     {
         uint8_t bit;
+       if(vioapic->intrs_delivered_cb) {
+           (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, deliver_bitmask);
+           vioapic->intrs_delivered_cb = 0;
+       }
         for ( bit = 0; deliver_bitmask != 0; bit++ )
         {
             if ( !(deliver_bitmask & (1 << bit)) )
                 continue;
             deliver_bitmask &= ~(1 << bit);
-#ifdef IRQ0_SPECIAL_ROUTING
-            /* Do not deliver timer interrupts to VCPU != 0 */
-            if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() )
-                v = vioapic_domain(vioapic)->vcpu[0];
-            else
-#endif
-                v = vioapic_domain(vioapic)->vcpu[bit];
-            if ( v != NULL )
-            {
-                target = vcpu_vlapic(v);
-                ioapic_inj_irq(vioapic, target, vector,
-                               trig_mode, delivery_mode);
-            }
-        }
+           v = vioapic_domain(vioapic)->vcpu[bit];
+           /* ioapic_get_delivery_bitmask guarantees that v is never NULL. */
+           if( v != NULL ) {
+               target = vcpu_vlapic(v);
+               ioapic_inj_irq(vioapic, target, vector,
+                              trig_mode, delivery_mode);
+           }
+       }   
         break;
     }
 
@@ -404,6 +419,23 @@
                  delivery_mode);
         break;
     }
+}
+
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int 
*vector)
+{
+    struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+    union vioapic_redir_entry *ent;
+    int ret = 1;
+    unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    ent = &vioapic->redirtbl[gsi];
+    if ( !ent->fields.mask ) {
+       *vector = ent->fields.vector;
+       ret = 0;
+    }
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+    return ret;
 }
 
 void vioapic_irq_positive_edge(struct domain *d, unsigned int irq)
diff -r ec3493b63170 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -113,7 +113,6 @@
 /*
  * IRR-specific bitmap update & search routines.
  */
-
 static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic)
 {
     return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]);
@@ -165,6 +164,12 @@
                 vlapic, ppr, isr, isrv);
 
     return ppr;
+}
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    
+    return ((vector & 0xf0) <= vlapic_get_ppr(vlapic));
 }
 
 int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda)
diff -r ec3493b63170 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,6 +163,10 @@
     struct vcpu *v = current;
     unsigned int tpr_threshold = 0;
     enum hvm_intblk intblk;
+    unsigned long eflags;
+
+    eflags = __vmread(GUEST_RFLAGS);
+    hvm_intr_en_notif_disarm(v, irq_masked(eflags));
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
@@ -200,6 +204,7 @@
     {
         HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(v, intack.vector);
+        hvm_intr_en_notif_arm(v, intack.vector);
         pt_intr_post(v, intack);
     }
 
diff -r ec3493b63170 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -36,6 +36,7 @@
 string_param("clocksource", opt_clocksource);
 
 #define EPOCH MILLISECS(1000)
+#define HPET_PERIOD_SIMULATED 0x429b17fUL
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
 DEFINE_SPINLOCK(rtc_lock);
@@ -348,6 +349,7 @@
     return hpet_read32(HPET_COUNTER);
 }
 
+int hpet_physical_inited = 0;
 static int init_hpet(struct platform_timesource *pts)
 {
     u64 hpet_rate = hpet_setup();
@@ -359,6 +361,8 @@
     pts->frequency = hpet_rate;
     pts->read_counter = read_hpet_count;
     pts->counter_bits = 32;
+
+    hpet_physical_inited = 1;
 
     return 1;
 }
@@ -500,6 +504,64 @@
 
     return stime;
 }
+static int hpet_main_counter_phys_avoid_hdw = 0;
+boolean_param("hpet_avoid", hpet_main_counter_phys_avoid_hdw);
+static unsigned long hpet_main_counter_phys_avoid_hdw_period;
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period) 
+#define hpet_tick_to_ns(tick, period) hpet_mult_div(tick, period, 1000000UL);
+#define TSC_TO_NSEC(tsc) hpet_mult_div(tsc, 1000000UL, cpu_khz) 
+
+typedef struct {
+    spinlock_t lock;
+    s_time_t last_ret;
+} get_s_time_mono_t;
+
+static get_s_time_mono_t get_s_time_mon;
+
+static void get_s_time_mono_init(void)
+{
+    spin_lock_init(&get_s_time_mon.lock);
+}
+
+u64 read_64_main_counter(void)
+{
+    u64 count;
+    unsigned long flags;
+    struct cpu_time *t = &this_cpu(cpu_time);
+    u64 tsc, delta;
+    s_time_t now;
+
+    if(hpet_main_counter_phys_avoid_hdw || !hpet_physical_inited) {
+        spin_lock(&get_s_time_mon.lock);
+        rdtscll(tsc);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+        if(now > get_s_time_mon.last_ret)
+            get_s_time_mon.last_ret = now;
+        else
+            now = get_s_time_mon.last_ret;
+        spin_unlock(&get_s_time_mon.lock);
+        if(!hpet_main_counter_phys_avoid_hdw_period)
+            hpet_main_counter_phys_avoid_hdw_period = read_hpet_period();
+        count = hpet_phys_ns_to_ticks(now, 
hpet_main_counter_phys_avoid_hdw_period);
+    }
+    else {
+        spin_lock_irqsave(&platform_timer_lock, flags);
+        count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & 
plt_mask);
+        spin_unlock_irqrestore(&platform_timer_lock, flags);
+    }
+    return count;
+}
+u64 read_hpet_period(void)
+{
+    unsigned long period;
+
+    if(hpet_physical_inited)
+        period = (unsigned long)hpet_read32(HPET_PERIOD);
+    else
+        period = HPET_PERIOD_SIMULATED;
+    return period;
+}
 
 static void platform_time_calibration(void)
 {
@@ -559,6 +621,7 @@
     plt_overflow(NULL);
 
     platform_timer_stamp = plt_stamp64;
+    get_s_time_mono_init();
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
diff -r ec3493b63170 xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -96,6 +96,8 @@
     spin_lock_init(&d->hypercall_deadlock_mutex);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
+    spin_lock_init(&d->intr_en_notif_lock);
+    INIT_LIST_HEAD(&d->intr_en_notif_list);
 
     return d;
 }
diff -r ec3493b63170 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -23,6 +23,7 @@
 
 #include <asm/current.h>
 #include <asm/x86_emulate.h>
+#include <asm/processor.h>
 #include <public/domctl.h>
 #include <public/hvm/save.h>
 
@@ -218,10 +219,20 @@
     hvm_funcs.set_segment_register(v, seg, reg);
 }
 
+static inline int irq_masked(unsigned long eflags)
+{
+    return ((eflags & X86_EFLAGS_IF) == 0);
+}
+
 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
+
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, 
int notif_fn_index);
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector);
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector);
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked);
 
 static inline void
 hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
diff -r ec3493b63170 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -160,6 +160,10 @@
     struct domain *d, unsigned int isa_irq);
 void hvm_isa_irq_deassert(
     struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_assert_cb(
+                          struct domain *d, unsigned int isa_irq,
+                          void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t 
intrs_delivered),
+                          uint64_t cb_arg);
 
 void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
 
diff -r ec3493b63170 xen/include/asm-x86/hvm/vioapic.h
--- a/xen/include/asm-x86/hvm/vioapic.h
+++ b/xen/include/asm-x86/hvm/vioapic.h
@@ -66,5 +66,7 @@
 void vioapic_reset(struct domain *d);
 void vioapic_irq_positive_edge(struct domain *d, unsigned int irq);
 void vioapic_update_EOI(struct domain *d, int vector);
-
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int 
*vector);
+void vioapic_register_delivered_cb(struct domain *d, void 
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+                                  uint64_t cb_arg);
 #endif /* __ASM_X86_HVM_VIOAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -98,4 +98,6 @@
 
 int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
 
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector);
+
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -50,23 +50,45 @@
         uint64_t fsb;           /* FSB route, not supported now */
     } timers[HPET_TIMER_NUM];
 
-    /* Hidden register state */
+    /* The rest of this struct is hidden register state */
+
+    /* Per timer state */
     uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+    uint32_t vector[HPET_TIMER_NUM];
+
+    /* Timer 0 (clock) specific state */
+ 
+    uint64_t last_end_of_intr_mc;
+    uint64_t end_of_intr_mc;
+    uint64_t intr_pending_nr;
+    uint64_t pending_mask;
+    uint32_t delivery_policy;
+ 
+    /* Global state */
+ 
+    uint64_t phys_period;
+    uint64_t cpu_khz;
+    uint64_t migr_local_tsc;
+ 
+    /* Debug */
+ 
+    uint64_t intr_counts[INTR_CNT_BUCKETS];
+    uint64_t intr_counts_last_s;
 };
 
 typedef struct HPETState {
     struct hpet_registers hpet;
     struct vcpu *vcpu;
     uint64_t stime_freq;
-    uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
-    uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns      */
     uint64_t mc_offset;
     struct timer timers[HPET_TIMER_NUM];
     struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM]; 
     spinlock_t lock;
 } HPETState;
 
-
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int 
post);
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned 
int post);
+void hpet_notify_timer_mode(struct domain *d, uint64_t value);
 /*
  * Abstract layer of periodic time, one short time.
  */
diff -r ec3493b63170 xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -47,6 +47,12 @@
 /*
  * Processor
  */
+
+typedef struct vcpu_intr_en_notif {
+    int intr_en_notif_fn;
+    int intr_en_notif_state;
+    unsigned int intr_en_notif_vec;
+} vcpu_intr_en_notif_t;
 
 struct hvm_hw_cpu {
     uint8_t  fpu_regs[512];
@@ -156,6 +162,7 @@
     };
     /* error code for pending event */
     uint32_t error_code;
+    vcpu_intr_en_notif_t int_notif;
 };
 
 DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
@@ -253,6 +260,8 @@
 #endif
         } fields;
     } redirtbl[VIOAPIC_NUM_PINS];
+    void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered);
+    uint64_t cb_arg;
 };
 
 DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
@@ -366,6 +375,11 @@
  * HPET
  */
 
+#define HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS 0 /* Linux */
+#define HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS 1 /* Windows */
+#define HPET_DEL_POLICY_NUMS 2
+#define INTR_CNT_BUCKETS 20
+
 #define HPET_TIMER_NUM     3    /* 3 timers supported now */
 struct hvm_hw_hpet {
     /* Memory-mapped, software visible registers */
@@ -385,8 +399,30 @@
     } timers[HPET_TIMER_NUM];
     uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */
 
-    /* Hidden register state */
+    /* The rest of this struct is hidden register state */
+
+    /* Per timer state */
     uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+    uint32_t vector[HPET_TIMER_NUM];
+
+    /* Timer 0 (clock) specific state */
+ 
+    uint64_t last_end_of_intr_mc;
+    uint64_t end_of_intr_mc;
+    uint64_t intr_pending_nr;
+    uint64_t pending_mask;
+    uint32_t delivery_policy;
+ 
+    /* Global state */
+ 
+    uint64_t phys_period;
+    uint64_t cpu_khz;
+    uint64_t migr_local_tsc;
+ 
+    /* Debug */
+ 
+    uint64_t intr_counts[INTR_CNT_BUCKETS];
+    uint64_t intr_counts_last_s;
 };
 
 DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
diff -r ec3493b63170 xen/include/public/hvm/params.h
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -80,6 +80,8 @@
 #define HVMPTM_no_delay_for_missed_ticks 1
 #define HVMPTM_no_missed_ticks_pending   2
 #define HVMPTM_one_missed_tick_pending   3
+#define HVM_HPET_guest_computes_missed_ticks  4
+#define HVM_HPET_guest_does_not_compute_missed_ticks  5
 
 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
 #define HVM_PARAM_HPET_ENABLED 11
diff -r ec3493b63170 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -69,6 +69,19 @@
 int  evtchn_init(struct domain *d);
 void evtchn_destroy(struct domain *d);
 
+typedef void (*notif_fn_t)(struct vcpu *v, unsigned int vector, unsigned int 
post);
+
+#define HVM_INTR_EN_NOTIF_UNUSED 0
+#define HVM_INTR_EN_NOTIF_HPET_MISSED 1
+#define HVM_INTR_EN_NOTIF_HPET_NO_MISSED 2
+#define HVM_INTR_EN_NOTIF_MAX 2
+
+typedef struct intr_en_notif {
+    struct list_head links;
+    unsigned int notif_vector;
+    int notif_fn;
+} intr_en_notif_t;
+
 struct vcpu 
 {
     int              vcpu_id;
@@ -135,6 +148,7 @@
     cpumask_t        vcpu_dirty_cpumask;
 
     struct arch_vcpu arch;
+    vcpu_intr_en_notif_t int_notif;
 };
 
 /* Per-domain lock can be recursively acquired in fault handlers. */
@@ -232,6 +246,10 @@
     int32_t time_offset_seconds;
 
     struct rcu_head rcu;
+ 
+    spinlock_t intr_en_notif_lock;
+    unsigned long intr_en_notif_bitmap[(MAX_VECTOR/sizeof(unsigned long))+1];
+    struct list_head intr_en_notif_list;
 
     unsigned long last_tsc_sender;
     unsigned long first_tsc_receiver;
@@ -508,6 +526,18 @@
     if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
         vcpu_wake(v);
 }
+/*
+ * compute (var*num)/den where var*num may overflow 64 bits
+ */
+static inline uint64_t hpet_mult_div(uint64_t var, uint64_t num, uint64_t den)
+{
+    uint64_t result, q, r;
+
+       q = var / den;
+       r = var % den;
+       result = (q * num) + (r * num) / den;
+    return result;
+}
 
 #define IS_PRIV(_d) ((_d)->is_privileged)
 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == 
(_t)))
diff -r ec3493b63170 xen/include/xen/time.h
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -61,6 +61,11 @@
 
 extern void send_timer_event(struct vcpu *v);
 
+u64 read_64_main_counter(void);
+u64 read_hpet_period(void);
+
+extern int hpet_physical_inited;
+
 #endif /* __XEN_TIME_H__ */
 
 /*
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 2/2] Improve hpet accuracy, Ben Guthro <=