On Fri, 7 May 2010, Zhang, Xiantao wrote:
>
> >>>
> >>> Also there are two gtsc_khz AFAICT: d->arch.tsc_khz and
> >>> d->arch.hvm_domain.gtsc_khz, why hvm guests have their own separate
> >>> record of the guest tsc frequency, when they never actually change
> >>> it?
>
> The latter one is only used for save/restore and migration to record the
> guest's expected TSC frequency. Thanks!
> Xiantao
>
This is my proposed fix: I am removing the tsc_scaled variable that is
never actually used because when tsc needs to be scaled vtsc is 1.
I am also making this more explicit in tsc_set_info.
I am also removing hvm_domain.gtsc_khz that is a duplicate of
d->arch.tsc_khz.
I am using scale_delta(delta, &d->arch.ns_to_vtsc) to scale the tsc
value before returning it to the guest like in the pv case.
I added a feature flag to specify that the pvclock algorithm is safe to
be used in an HVM guest so that the guest can now use it without
hanging.
I think this patch catches all possible cases, but I would like some
review to be sure.
Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
---
diff -r d1c245c1c976 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri May 07 16:03:10 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c Fri May 07 19:54:32 2010 +0100
@@ -153,32 +153,6 @@
hvm_funcs.set_rdtsc_exiting(v, enable);
}
-int hvm_gtsc_need_scale(struct domain *d)
-{
- uint32_t gtsc_mhz, htsc_mhz;
-
- if ( d->arch.vtsc )
- return 0;
-
- gtsc_mhz = d->arch.hvm_domain.gtsc_khz / 1000;
- htsc_mhz = (uint32_t)cpu_khz / 1000;
-
- d->arch.hvm_domain.tsc_scaled = (gtsc_mhz && (gtsc_mhz != htsc_mhz));
- return d->arch.hvm_domain.tsc_scaled;
-}
-
-static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc)
-{
- uint32_t gtsc_khz, htsc_khz;
-
- if ( !v->domain->arch.hvm_domain.tsc_scaled )
- return host_tsc;
-
- htsc_khz = cpu_khz;
- gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz;
- return muldiv64(host_tsc, gtsc_khz, htsc_khz);
-}
-
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
{
uint64_t tsc;
@@ -186,11 +160,11 @@
if ( v->domain->arch.vtsc )
{
tsc = hvm_get_guest_time(v);
+ tsc = gtime_to_gtsc(v->domain, tsc);
}
else
{
rdtscll(tsc);
- tsc = hvm_h2g_scale_tsc(v, tsc);
}
v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc;
@@ -204,12 +178,12 @@
if ( v->domain->arch.vtsc )
{
tsc = hvm_get_guest_time(v);
+ tsc = gtime_to_gtsc(v->domain, tsc);
v->domain->arch.vtsc_kerncount++;
}
else
{
rdtscll(tsc);
- tsc = hvm_h2g_scale_tsc(v, tsc);
}
return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
diff -r d1c245c1c976 xen/arch/x86/hvm/save.c
--- a/xen/arch/x86/hvm/save.c Fri May 07 16:03:10 2010 +0100
+++ b/xen/arch/x86/hvm/save.c Fri May 07 19:54:32 2010 +0100
@@ -33,7 +33,7 @@
hdr->cpuid = eax;
/* Save guest's preferred TSC. */
- hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz;
+ hdr->gtsc_khz = d->arch.tsc_khz;
}
int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -62,8 +62,8 @@
/* Restore guest's preferred TSC frequency. */
if ( hdr->gtsc_khz )
- d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz;
- if ( hvm_gtsc_need_scale(d) )
+ d->arch.tsc_khz = hdr->gtsc_khz;
+ if ( d->arch.vtsc )
{
hvm_set_rdtsc_exiting(d, 1);
gdprintk(XENLOG_WARNING, "Domain %d expects freq %uMHz "
diff -r d1c245c1c976 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c Fri May 07 16:03:10 2010 +0100
+++ b/xen/arch/x86/hvm/vpt.c Fri May 07 19:54:32 2010 +0100
@@ -32,9 +32,6 @@
spin_lock_init(&pl->pl_time_lock);
pl->stime_offset = -(u64)get_s_time();
pl->last_guest_time = 0;
-
- d->arch.hvm_domain.gtsc_khz = cpu_khz;
- d->arch.hvm_domain.tsc_scaled = 0;
}
u64 hvm_get_guest_time(struct vcpu *v)
diff -r d1c245c1c976 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri May 07 16:03:10 2010 +0100
+++ b/xen/arch/x86/time.c Fri May 07 19:54:32 2010 +0100
@@ -828,8 +828,13 @@
if ( d->arch.vtsc )
{
- u64 delta = max_t(s64, t->stime_local_stamp - d->arch.vtsc_offset, 0);
- tsc_stamp = scale_delta(delta, &d->arch.ns_to_vtsc);
+ u64 stime = t->stime_local_stamp;
+ if ( is_hvm_domain(d) )
+ {
+ struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
+ stime += pl->stime_offset + v->arch.hvm_vcpu.stime_offset;
+ }
+ tsc_stamp = gtime_to_gtsc(d, stime);
}
else
{
@@ -852,6 +857,8 @@
_u.tsc_to_system_mul = t->tsc_scale.mul_frac;
_u.tsc_shift = (s8)t->tsc_scale.shift;
}
+ if ( is_hvm_domain(d) )
+ _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
/* Don't bother unless timestamp record has changed or we are forced. */
_u.version = u->version; /* make versions match for memcmp test */
@@ -1618,11 +1625,18 @@
* PV SoftTSC Emulation.
*/
+u64 gtime_to_gtsc(struct domain *d, u64 tsc)
+{
+ if ( !is_hvm_domain(d) )
+ tsc = max_t(s64, tsc - d->arch.vtsc_offset, 0);
+ return scale_delta(tsc, &d->arch.ns_to_vtsc);
+}
+
void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp)
{
s_time_t now = get_s_time();
struct domain *d = v->domain;
- u64 delta;
+ u64 tsc;
spin_lock(&d->arch.vtsc_lock);
@@ -1638,8 +1652,7 @@
spin_unlock(&d->arch.vtsc_lock);
- delta = max_t(s64, now - d->arch.vtsc_offset, 0);
- now = scale_delta(delta, &d->arch.ns_to_vtsc);
+ tsc = gtime_to_gtsc(d, now);
regs->eax = (uint32_t)now;
regs->edx = (uint32_t)(now >> 32);
@@ -1780,8 +1793,10 @@
d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
- /* use native TSC if initial host has safe TSC and not migrated yet */
- if ( host_tsc_is_safe() && incarnation == 0 )
+ /* use native TSC if initial host has safe TSC, has not migrated
+ * yet and tsc_khz == cpu_khz */
+ if ( host_tsc_is_safe() && incarnation == 0 &&
+ d->arch.tsc_khz == cpu_khz )
d->arch.vtsc = 0;
else
d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
@@ -1806,7 +1821,7 @@
}
d->arch.incarnation = incarnation + 1;
if ( is_hvm_domain(d) )
- hvm_set_rdtsc_exiting(d, d->arch.vtsc || hvm_gtsc_need_scale(d));
+ hvm_set_rdtsc_exiting(d, d->arch.vtsc);
}
/* vtsc may incur measurable performance degradation, diagnose with this */
diff -r d1c245c1c976 xen/common/kernel.c
--- a/xen/common/kernel.c Fri May 07 16:03:10 2010 +0100
+++ b/xen/common/kernel.c Fri May 07 19:54:32 2010 +0100
@@ -244,7 +244,8 @@
(1U << XENFEAT_highmem_assist) |
(1U << XENFEAT_gnttab_map_avail_bits);
else
- fi.submap |= (1U << XENFEAT_hvm_callback_vector);
+ fi.submap |= (1U << XENFEAT_hvm_callback_vector) |
+ (1U << XENFEAT_hvm_safe_pvclock);
#endif
break;
default:
diff -r d1c245c1c976 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Fri May 07 16:03:10 2010 +0100
+++ b/xen/include/asm-x86/hvm/domain.h Fri May 07 19:54:32 2010 +0100
@@ -45,8 +45,6 @@
struct hvm_ioreq_page ioreq;
struct hvm_ioreq_page buf_ioreq;
- uint32_t gtsc_khz; /* kHz */
- bool_t tsc_scaled;
struct pl_time pl_time;
struct hvm_io_handler io_handler;
diff -r d1c245c1c976 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Fri May 07 16:03:10 2010 +0100
+++ b/xen/include/asm-x86/hvm/hvm.h Fri May 07 19:54:32 2010 +0100
@@ -295,7 +295,6 @@
uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2);
void hvm_set_rdtsc_exiting(struct domain *d, bool_t enable);
-int hvm_gtsc_need_scale(struct domain *d);
static inline int
hvm_cpu_prepare(unsigned int cpu)
diff -r d1c245c1c976 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Fri May 07 16:03:10 2010 +0100
+++ b/xen/include/asm-x86/time.h Fri May 07 19:54:32 2010 +0100
@@ -60,6 +60,7 @@
uint64_t ns_to_acpi_pm_tick(uint64_t ns);
void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp);
+u64 gtime_to_gtsc(struct domain *d, u64 tsc);
void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
uint32_t gtsc_khz, uint32_t incarnation);
diff -r d1c245c1c976 xen/include/public/features.h
--- a/xen/include/public/features.h Fri May 07 16:03:10 2010 +0100
+++ b/xen/include/public/features.h Fri May 07 19:54:32 2010 +0100
@@ -71,6 +71,9 @@
/* x86: Does this Xen host support the HVM callback vector type? */
#define XENFEAT_hvm_callback_vector 8
+/* x86: pvclock algorithm is safe to use on HVM */
+#define XENFEAT_hvm_safe_pvclock 9
+
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|