How to handle the VM migration which occurs bettwen two different TSC frequency
machines ?
Xiantao
Stefano Stabellini wrote:
> On Fri, 7 May 2010, Zhang, Xiantao wrote:
>>
>>>>>
>>>>> Also there are two gtsc_khz AFAICT: d->arch.tsc_khz and
>>>>> d->arch.hvm_domain.gtsc_khz, why hvm guests have their own
>>>>> separate record of the guest tsc frequency, when they never
>>>>> actually change it?
>>
>> The latter one is only used for save/restore and migration to record
>> the guest's expected TSC frequency. Thanks! Xiantao
>>
>
> This is my proposed fix: I am removing the tsc_scaled variable that is
> never actually used because when tsc needs to be scaled vtsc is 1.
> I am also making this more explicit in tsc_set_info.
> I am also removing hvm_domain.gtsc_khz that is a duplicate of
> d->arch.tsc_khz.
> I am using scale_delta(delta, &d->arch.ns_to_vtsc) to scale the tsc
> value before returning it to the guest like in the pv case.
> I added a feature flag to specify that the pvclock algorithm is safe
> to
> be used in an HVM guest so that the guest can now use it without
> hanging.
>
> I think this patch catches all possible cases, but I would like some
> review to be sure.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
>
>
> ---
>
>
> diff -r d1c245c1c976 xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/hvm.c Fri May 07 19:54:32 2010 +0100
> @@ -153,32 +153,6 @@
> hvm_funcs.set_rdtsc_exiting(v, enable);
> }
>
> -int hvm_gtsc_need_scale(struct domain *d)
> -{
> - uint32_t gtsc_mhz, htsc_mhz;
> -
> - if ( d->arch.vtsc )
> - return 0;
> -
> - gtsc_mhz = d->arch.hvm_domain.gtsc_khz / 1000;
> - htsc_mhz = (uint32_t)cpu_khz / 1000;
> -
> - d->arch.hvm_domain.tsc_scaled = (gtsc_mhz && (gtsc_mhz !=
> htsc_mhz));
> - return d->arch.hvm_domain.tsc_scaled;
> -}
> -
> -static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc)
> -{
> - uint32_t gtsc_khz, htsc_khz;
> -
> - if ( !v->domain->arch.hvm_domain.tsc_scaled )
> - return host_tsc;
> -
> - htsc_khz = cpu_khz;
> - gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz;
> - return muldiv64(host_tsc, gtsc_khz, htsc_khz);
> -}
> -
> void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
> {
> uint64_t tsc;
> @@ -186,11 +160,11 @@
> if ( v->domain->arch.vtsc )
> {
> tsc = hvm_get_guest_time(v);
> + tsc = gtime_to_gtsc(v->domain, tsc);
> }
> else
> {
> rdtscll(tsc);
> - tsc = hvm_h2g_scale_tsc(v, tsc);
> }
>
> v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc;
> @@ -204,12 +178,12 @@
> if ( v->domain->arch.vtsc )
> {
> tsc = hvm_get_guest_time(v);
> + tsc = gtime_to_gtsc(v->domain, tsc);
> v->domain->arch.vtsc_kerncount++;
> }
> else
> {
> rdtscll(tsc);
> - tsc = hvm_h2g_scale_tsc(v, tsc);
> }
>
> return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
> diff -r d1c245c1c976 xen/arch/x86/hvm/save.c
> --- a/xen/arch/x86/hvm/save.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/save.c Fri May 07 19:54:32 2010 +0100
> @@ -33,7 +33,7 @@
> hdr->cpuid = eax;
>
> /* Save guest's preferred TSC. */
> - hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz;
> + hdr->gtsc_khz = d->arch.tsc_khz;
> }
>
> int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
> @@ -62,8 +62,8 @@
>
> /* Restore guest's preferred TSC frequency. */
> if ( hdr->gtsc_khz )
> - d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz;
> - if ( hvm_gtsc_need_scale(d) )
> + d->arch.tsc_khz = hdr->gtsc_khz;
> + if ( d->arch.vtsc )
> {
> hvm_set_rdtsc_exiting(d, 1);
> gdprintk(XENLOG_WARNING, "Domain %d expects freq %uMHz "
> diff -r d1c245c1c976 xen/arch/x86/hvm/vpt.c
> --- a/xen/arch/x86/hvm/vpt.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/vpt.c Fri May 07 19:54:32 2010 +0100
> @@ -32,9 +32,6 @@
> spin_lock_init(&pl->pl_time_lock);
> pl->stime_offset = -(u64)get_s_time();
> pl->last_guest_time = 0;
> -
> - d->arch.hvm_domain.gtsc_khz = cpu_khz;
> - d->arch.hvm_domain.tsc_scaled = 0;
> }
>
> u64 hvm_get_guest_time(struct vcpu *v)
> diff -r d1c245c1c976 xen/arch/x86/time.c
> --- a/xen/arch/x86/time.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/time.c Fri May 07 19:54:32 2010 +0100
> @@ -828,8 +828,13 @@
>
> if ( d->arch.vtsc )
> {
> - u64 delta = max_t(s64, t->stime_local_stamp -
> d->arch.vtsc_offset, 0);
> - tsc_stamp = scale_delta(delta, &d->arch.ns_to_vtsc);
> + u64 stime = t->stime_local_stamp;
> + if ( is_hvm_domain(d) )
> + {
> + struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
> + stime += pl->stime_offset +
> v->arch.hvm_vcpu.stime_offset; + }
> + tsc_stamp = gtime_to_gtsc(d, stime);
> }
> else
> {
> @@ -852,6 +857,8 @@
> _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
> _u.tsc_shift = (s8)t->tsc_scale.shift;
> }
> + if ( is_hvm_domain(d) )
> + _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
>
> /* Don't bother unless timestamp record has changed or we are
> forced. */ _u.version = u->version; /* make versions match for
> memcmp test */ @@ -1618,11 +1625,18 @@
> * PV SoftTSC Emulation.
> */
>
> +u64 gtime_to_gtsc(struct domain *d, u64 tsc)
> +{
> + if ( !is_hvm_domain(d) )
> + tsc = max_t(s64, tsc - d->arch.vtsc_offset, 0);
> + return scale_delta(tsc, &d->arch.ns_to_vtsc);
> +}
> +
> void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int
> rdtscp) {
> s_time_t now = get_s_time();
> struct domain *d = v->domain;
> - u64 delta;
> + u64 tsc;
>
> spin_lock(&d->arch.vtsc_lock);
>
> @@ -1638,8 +1652,7 @@
>
> spin_unlock(&d->arch.vtsc_lock);
>
> - delta = max_t(s64, now - d->arch.vtsc_offset, 0);
> - now = scale_delta(delta, &d->arch.ns_to_vtsc);
> + tsc = gtime_to_gtsc(d, now);
>
> regs->eax = (uint32_t)now;
> regs->edx = (uint32_t)(now >> 32);
> @@ -1780,8 +1793,10 @@
> d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
> d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
> set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
> - /* use native TSC if initial host has safe TSC and not
> migrated yet */
> - if ( host_tsc_is_safe() && incarnation == 0 )
> + /* use native TSC if initial host has safe TSC, has not
> migrated + * yet and tsc_khz == cpu_khz */
> + if ( host_tsc_is_safe() && incarnation == 0 &&
> + d->arch.tsc_khz == cpu_khz )
> d->arch.vtsc = 0;
> else
> d->arch.ns_to_vtsc =
> scale_reciprocal(d->arch.vtsc_to_ns); @@ -1806,7 +1821,7 @@
> }
> d->arch.incarnation = incarnation + 1;
> if ( is_hvm_domain(d) )
> - hvm_set_rdtsc_exiting(d, d->arch.vtsc ||
> hvm_gtsc_need_scale(d)); + hvm_set_rdtsc_exiting(d,
> d->arch.vtsc); }
>
> /* vtsc may incur measurable performance degradation, diagnose with
> this */ diff -r d1c245c1c976 xen/common/kernel.c
> --- a/xen/common/kernel.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/common/kernel.c Fri May 07 19:54:32 2010 +0100
> @@ -244,7 +244,8 @@
> (1U << XENFEAT_highmem_assist) |
> (1U << XENFEAT_gnttab_map_avail_bits);
> else
> - fi.submap |= (1U << XENFEAT_hvm_callback_vector);
> + fi.submap |= (1U << XENFEAT_hvm_callback_vector) |
> + (1U << XENFEAT_hvm_safe_pvclock);
> #endif
> break;
> default:
> diff -r d1c245c1c976 xen/include/asm-x86/hvm/domain.h
> --- a/xen/include/asm-x86/hvm/domain.h Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/asm-x86/hvm/domain.h Fri May 07 19:54:32 2010 +0100
> @@ -45,8 +45,6 @@
> struct hvm_ioreq_page ioreq;
> struct hvm_ioreq_page buf_ioreq;
>
> - uint32_t gtsc_khz; /* kHz */
> - bool_t tsc_scaled;
> struct pl_time pl_time;
>
> struct hvm_io_handler io_handler;
> diff -r d1c245c1c976 xen/include/asm-x86/hvm/hvm.h
> --- a/xen/include/asm-x86/hvm/hvm.h Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/asm-x86/hvm/hvm.h Fri May 07 19:54:32 2010 +0100
> @@ -295,7 +295,6 @@
> uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2);
>
> void hvm_set_rdtsc_exiting(struct domain *d, bool_t enable);
> -int hvm_gtsc_need_scale(struct domain *d);
>
> static inline int
> hvm_cpu_prepare(unsigned int cpu)
> diff -r d1c245c1c976 xen/include/asm-x86/time.h
> --- a/xen/include/asm-x86/time.h Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/asm-x86/time.h Fri May 07 19:54:32 2010 +0100
> @@ -60,6 +60,7 @@
> uint64_t ns_to_acpi_pm_tick(uint64_t ns);
>
> void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int
> rdtscp); +u64 gtime_to_gtsc(struct domain *d, u64 tsc);
>
> void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t
> elapsed_nsec, uint32_t gtsc_khz, uint32_t
> incarnation);
> diff -r d1c245c1c976 xen/include/public/features.h
> --- a/xen/include/public/features.h Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/public/features.h Fri May 07 19:54:32 2010 +0100
> @@ -71,6 +71,9 @@
> /* x86: Does this Xen host support the HVM callback vector type? */
> #define XENFEAT_hvm_callback_vector 8
>
> +/* x86: pvclock algorithm is safe to use on HVM */
> +#define XENFEAT_hvm_safe_pvclock 9
> +
> #define XENFEAT_NR_SUBMAPS 1
>
> #endif /* __XEN_PUBLIC_FEATURES_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|