WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

RE: [Xen-devel] tsc_mode == 1 and hvm guests

Hi Stefano --

Eyeballing the patch looks good to me.  Have you
verified HVM save/restore/migration works (to physical
machine with different TSC rate)?

Dan

> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@xxxxxxxxxxxxx]
> Sent: Friday, May 07, 2010 1:06 PM
> To: Zhang, Xiantao
> Cc: Dan Magenheimer; Keir Fraser; Stefano Stabellini; xen-
> devel@xxxxxxxxxxxxxxxxxxx; Xu, Dongxiao
> Subject: RE: [Xen-devel] tsc_mode == 1 and hvm guests
> 
> On Fri, 7 May 2010, Zhang, Xiantao wrote:
> >
> > >>>
> > >>> Also there are two gtsc_khz AFAICT: d->arch.tsc_khz and
> > >>> d->arch.hvm_domain.gtsc_khz, why hvm guests have their own
> separate
> > >>> record of the guest tsc frequency, when they never actually
> change
> > >>> it?
> >
> > The latter one is only used for save/restore and migration to record
> the guest's expected TSC frequency.  Thanks!
> > Xiantao
> >
> 
> This is my proposed fix: I am removing the tsc_scaled variable that is
> never actually used because when tsc needs to be scaled vtsc is 1.
> I am also making this more explicit in tsc_set_info.
> I am also removing hvm_domain.gtsc_khz that is a duplicate of
> d->arch.tsc_khz.
> I am using scale_delta(delta, &d->arch.ns_to_vtsc) to scale the tsc
> value before returning it to the guest like in the pv case.
> I added a feature flag to specify that the pvclock algorithm is safe to
> be used in an HVM guest so that the guest can now use it without
> hanging.
> 
> I think this patch catches all possible cases, but I would like some
> review to be sure.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> 
> 
> ---
> 
> 
> diff -r d1c245c1c976 xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c  Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/hvm.c  Fri May 07 19:54:32 2010 +0100
> @@ -153,32 +153,6 @@
>          hvm_funcs.set_rdtsc_exiting(v, enable);
>  }
> 
> -int hvm_gtsc_need_scale(struct domain *d)
> -{
> -    uint32_t gtsc_mhz, htsc_mhz;
> -
> -    if ( d->arch.vtsc )
> -        return 0;
> -
> -    gtsc_mhz = d->arch.hvm_domain.gtsc_khz / 1000;
> -    htsc_mhz = (uint32_t)cpu_khz / 1000;
> -
> -    d->arch.hvm_domain.tsc_scaled = (gtsc_mhz && (gtsc_mhz !=
> htsc_mhz));
> -    return d->arch.hvm_domain.tsc_scaled;
> -}
> -
> -static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc)
> -{
> -    uint32_t gtsc_khz, htsc_khz;
> -
> -    if ( !v->domain->arch.hvm_domain.tsc_scaled )
> -        return host_tsc;
> -
> -    htsc_khz = cpu_khz;
> -    gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz;
> -    return muldiv64(host_tsc, gtsc_khz, htsc_khz);
> -}
> -
>  void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
>  {
>      uint64_t tsc;
> @@ -186,11 +160,11 @@
>      if ( v->domain->arch.vtsc )
>      {
>          tsc = hvm_get_guest_time(v);
> +        tsc = gtime_to_gtsc(v->domain, tsc);
>      }
>      else
>      {
>          rdtscll(tsc);
> -        tsc = hvm_h2g_scale_tsc(v, tsc);
>      }
> 
>      v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc;
> @@ -204,12 +178,12 @@
>      if ( v->domain->arch.vtsc )
>      {
>          tsc = hvm_get_guest_time(v);
> +        tsc = gtime_to_gtsc(v->domain, tsc);
>          v->domain->arch.vtsc_kerncount++;
>      }
>      else
>      {
>          rdtscll(tsc);
> -        tsc = hvm_h2g_scale_tsc(v, tsc);
>      }
> 
>      return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
> diff -r d1c245c1c976 xen/arch/x86/hvm/save.c
> --- a/xen/arch/x86/hvm/save.c Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/save.c Fri May 07 19:54:32 2010 +0100
> @@ -33,7 +33,7 @@
>      hdr->cpuid = eax;
> 
>      /* Save guest's preferred TSC. */
> -    hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz;
> +    hdr->gtsc_khz = d->arch.tsc_khz;
>  }
> 
>  int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
> @@ -62,8 +62,8 @@
> 
>      /* Restore guest's preferred TSC frequency. */
>      if ( hdr->gtsc_khz )
> -        d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz;
> -    if ( hvm_gtsc_need_scale(d) )
> +        d->arch.tsc_khz = hdr->gtsc_khz;
> +    if ( d->arch.vtsc )
>      {
>          hvm_set_rdtsc_exiting(d, 1);
>          gdprintk(XENLOG_WARNING, "Domain %d expects freq %uMHz "
> diff -r d1c245c1c976 xen/arch/x86/hvm/vpt.c
> --- a/xen/arch/x86/hvm/vpt.c  Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/hvm/vpt.c  Fri May 07 19:54:32 2010 +0100
> @@ -32,9 +32,6 @@
>      spin_lock_init(&pl->pl_time_lock);
>      pl->stime_offset = -(u64)get_s_time();
>      pl->last_guest_time = 0;
> -
> -    d->arch.hvm_domain.gtsc_khz = cpu_khz;
> -    d->arch.hvm_domain.tsc_scaled = 0;
>  }
> 
>  u64 hvm_get_guest_time(struct vcpu *v)
> diff -r d1c245c1c976 xen/arch/x86/time.c
> --- a/xen/arch/x86/time.c     Fri May 07 16:03:10 2010 +0100
> +++ b/xen/arch/x86/time.c     Fri May 07 19:54:32 2010 +0100
> @@ -828,8 +828,13 @@
> 
>      if ( d->arch.vtsc )
>      {
> -        u64 delta = max_t(s64, t->stime_local_stamp - d-
> >arch.vtsc_offset, 0);
> -        tsc_stamp = scale_delta(delta, &d->arch.ns_to_vtsc);
> +        u64 stime = t->stime_local_stamp;
> +        if ( is_hvm_domain(d) )
> +        {
> +            struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
> +            stime += pl->stime_offset + v->arch.hvm_vcpu.stime_offset;
> +        }
> +        tsc_stamp = gtime_to_gtsc(d, stime);
>      }
>      else
>      {
> @@ -852,6 +857,8 @@
>          _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
>          _u.tsc_shift         = (s8)t->tsc_scale.shift;
>      }
> +    if ( is_hvm_domain(d) )
> +        _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
> 
>      /* Don't bother unless timestamp record has changed or we are
> forced. */
>      _u.version = u->version; /* make versions match for memcmp test */
> @@ -1618,11 +1625,18 @@
>   * PV SoftTSC Emulation.
>   */
> 
> +u64 gtime_to_gtsc(struct domain *d, u64 tsc)
> +{
> +    if ( !is_hvm_domain(d) )
> +        tsc = max_t(s64, tsc - d->arch.vtsc_offset, 0);
> +    return scale_delta(tsc, &d->arch.ns_to_vtsc);
> +}
> +
>  void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int
> rdtscp)
>  {
>      s_time_t now = get_s_time();
>      struct domain *d = v->domain;
> -    u64 delta;
> +    u64 tsc;
> 
>      spin_lock(&d->arch.vtsc_lock);
> 
> @@ -1638,8 +1652,7 @@
> 
>      spin_unlock(&d->arch.vtsc_lock);
> 
> -    delta = max_t(s64, now - d->arch.vtsc_offset, 0);
> -    now = scale_delta(delta, &d->arch.ns_to_vtsc);
> +    tsc = gtime_to_gtsc(d, now);
> 
>      regs->eax = (uint32_t)now;
>      regs->edx = (uint32_t)(now >> 32);
> @@ -1780,8 +1793,10 @@
>          d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
>          d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
>          set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
> -        /* use native TSC if initial host has safe TSC and not
> migrated yet */
> -        if ( host_tsc_is_safe() && incarnation == 0 )
> +        /* use native TSC if initial host has safe TSC, has not
> migrated
> +         * yet and tsc_khz == cpu_khz */
> +        if ( host_tsc_is_safe() && incarnation == 0 &&
> +                d->arch.tsc_khz == cpu_khz )
>              d->arch.vtsc = 0;
>          else
>              d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
> @@ -1806,7 +1821,7 @@
>      }
>      d->arch.incarnation = incarnation + 1;
>      if ( is_hvm_domain(d) )
> -        hvm_set_rdtsc_exiting(d, d->arch.vtsc ||
> hvm_gtsc_need_scale(d));
> +        hvm_set_rdtsc_exiting(d, d->arch.vtsc);
>  }
> 
>  /* vtsc may incur measurable performance degradation, diagnose with
> this */
> diff -r d1c245c1c976 xen/common/kernel.c
> --- a/xen/common/kernel.c     Fri May 07 16:03:10 2010 +0100
> +++ b/xen/common/kernel.c     Fri May 07 19:54:32 2010 +0100
> @@ -244,7 +244,8 @@
>                               (1U << XENFEAT_highmem_assist) |
>                               (1U << XENFEAT_gnttab_map_avail_bits);
>              else
> -                fi.submap |= (1U << XENFEAT_hvm_callback_vector);
> +                fi.submap |= (1U << XENFEAT_hvm_callback_vector) |
> +                             (1U << XENFEAT_hvm_safe_pvclock);
>  #endif
>              break;
>          default:
> diff -r d1c245c1c976 xen/include/asm-x86/hvm/domain.h
> --- a/xen/include/asm-x86/hvm/domain.h        Fri May 07 16:03:10 2010
> +0100
> +++ b/xen/include/asm-x86/hvm/domain.h        Fri May 07 19:54:32 2010
> +0100
> @@ -45,8 +45,6 @@
>      struct hvm_ioreq_page  ioreq;
>      struct hvm_ioreq_page  buf_ioreq;
> 
> -    uint32_t               gtsc_khz; /* kHz */
> -    bool_t                 tsc_scaled;
>      struct pl_time         pl_time;
> 
>      struct hvm_io_handler  io_handler;
> diff -r d1c245c1c976 xen/include/asm-x86/hvm/hvm.h
> --- a/xen/include/asm-x86/hvm/hvm.h   Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/asm-x86/hvm/hvm.h   Fri May 07 19:54:32 2010 +0100
> @@ -295,7 +295,6 @@
>  uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2);
> 
>  void hvm_set_rdtsc_exiting(struct domain *d, bool_t enable);
> -int hvm_gtsc_need_scale(struct domain *d);
> 
>  static inline int
>  hvm_cpu_prepare(unsigned int cpu)
> diff -r d1c245c1c976 xen/include/asm-x86/time.h
> --- a/xen/include/asm-x86/time.h      Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/asm-x86/time.h      Fri May 07 19:54:32 2010 +0100
> @@ -60,6 +60,7 @@
>  uint64_t ns_to_acpi_pm_tick(uint64_t ns);
> 
>  void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int
> rdtscp);
> +u64 gtime_to_gtsc(struct domain *d, u64 tsc);
> 
>  void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t
> elapsed_nsec,
>                    uint32_t gtsc_khz, uint32_t incarnation);
> diff -r d1c245c1c976 xen/include/public/features.h
> --- a/xen/include/public/features.h   Fri May 07 16:03:10 2010 +0100
> +++ b/xen/include/public/features.h   Fri May 07 19:54:32 2010 +0100
> @@ -71,6 +71,9 @@
>  /* x86: Does this Xen host support the HVM callback vector type? */
>  #define XENFEAT_hvm_callback_vector        8
> 
> +/* x86: pvclock algorithm is safe to use on HVM */
> +#define XENFEAT_hvm_safe_pvclock           9
> +
>  #define XENFEAT_NR_SUBMAPS 1
> 
>  #endif /* __XEN_PUBLIC_FEATURES_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel