This isn't done yet, but contains the core code for
implementing the mechanism I've been proposing for
handling "tsc_mode" (to replace tsc_native/vtsc),
so I thought I'd ask for some feedback hopefully before
reviewers leave for Xen Summit Asia.
The tsc_mode is set in the VM config file as:
#define TSC_MODE_DEFAULT 0
#define TSC_MODE_ALWAYS_EMULATE 1
#define TSC_MODE_NEVER_EMULATE 2
#define TSC_MODE_PVRDTSCP 3
0 = guest rdtsc/p executed natively when monotonicity can be guaranteed
and emulated otherwise (with frequency scaled if necessary)
1 = guest rdtsc/p always emulated at 1GHz (kernel and user)
2 = guest rdtsc always executed natively (no monotonicity/frequency
guarantees); guest rdtscp emulated at native frequency if
unsupported by h/w, else executed natively
3 = same as 2, except xen manages TSC_AUX register so guest can
determine when a restore/migration has occurred and assumes
guest obtains/uses pvclock-like mechanism to adjust for
monotonicity and frequency changes
Tsc_mode must be persistent across save/restore/migration.
In addition, an offset, a tsc_khz, and a "incarnation" counter
are deduced on creation, and then must be persistent across
save/restore/migrate (though some are ignored for some tsc modes).
A key improvement over the previous tsc_native implementation
is that if TSC is "safe", the default tsc_mode does not
use emulation until after the first save/restore/migrate
(mimicking the previous implementation by Intel for HVM).
Since a surprising number of machines are now "TSC safe"
and since most domains never get saved/migrated, the
vast majority of VMs will never suffer the performance
penalty of emulated TSC even though TSC correctness for
applications is still provided.
Note that I haven't removed the tsc_native code yet,
some functionality is still underway
and there's still a fair amount of debug code to be
removed.
Thanks for any feedback!
Dan
diff -r 494ad84ad38c tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/libxc/xc_domain.c Thu Nov 12 16:48:53 2009 -0700
@@ -483,6 +483,52 @@ int xc_domain_disable_migrate(int xc_han
domctl.u.disable_migrate.disable = 1;
return do_domctl(xc_handle, &domctl);
}
+
+int xc_domain_set_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t tsc_mode,
+ uint64_t elapsed_nsec,
+ uint32_t gtsc_khz,
+ uint32_t incarnation)
+{
+ DECLARE_DOMCTL;
+ domctl.cmd = XEN_DOMCTL_settscinfo;
+ domctl.domain = (domid_t)domid;
+ domctl.u.tsc_info.info.tsc_mode = tsc_mode;
+ domctl.u.tsc_info.info.elapsed_nsec = elapsed_nsec;
+ domctl.u.tsc_info.info.gtsc_khz = gtsc_khz;
+ domctl.u.tsc_info.info.incarnation = incarnation;
+ return do_domctl(xc_handle, &domctl);
+}
+
+int xc_domain_get_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t *tsc_mode,
+ uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz,
+ uint32_t *incarnation)
+{
+ int rc;
+ DECLARE_DOMCTL;
+ xen_guest_tsc_info_t info = { 0 };
+
+ domctl.cmd = XEN_DOMCTL_gettscinfo;
+ domctl.domain = (domid_t)domid;
+ set_xen_guest_handle(domctl.u.tsc_info.out_info, &info);
+ if ( (rc = lock_pages(&info, sizeof(info))) != 0 )
+ return rc;
+ rc = do_domctl(xc_handle, &domctl);
+ if ( rc == 0 )
+ {
+ *tsc_mode = info.tsc_mode;
+ *elapsed_nsec = info.elapsed_nsec;
+ *gtsc_khz = info.gtsc_khz;
+ *incarnation = info.incarnation;
+ }
+ unlock_pages(&info,sizeof(info));
+ return rc;
+}
+
int xc_domain_memory_increase_reservation(int xc_handle,
uint32_t domid,
diff -r 494ad84ad38c tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/libxc/xc_domain_restore.c Thu Nov 12 16:48:53 2009 -0700
@@ -1083,6 +1083,23 @@ static int pagebuf_get_one(pagebuf_t* bu
ERROR("error reading/restoring tmem extra");
return -1;
}
+ return pagebuf_get_one(buf, fd, xch, dom);
+ } else if ( count == -7 ) {
+ uint32_t tsc_mode, khz, incarn;
+ uint64_t nsec;
+ if ( read_exact(fd, &tsc_mode, sizeof(uint32_t)) ||
+ read_exact(fd, &nsec, sizeof(uint64_t)) ||
+ read_exact(fd, &khz, sizeof(uint32_t)) ||
+ read_exact(fd, &incarn, sizeof(uint32_t)) ||
+ xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
+ ERROR("error reading/restoring tmem extra");
+ return -1;
+ }
+ /* no PRIxxx formatting allowed here???
+ DPRINTF("tsc_info read: mode=%"PRIu32",ns=0x%"PRIx64","
+ "khz=%"PRIu32",incarn=%"PRIu32"\n",
+ tsc_mode, nsec, khz, incarn);
+ */
return pagebuf_get_one(buf, fd, xch, dom);
} else if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
ERROR("Max batch size exceeded (%d). Giving up.", count);
diff -r 494ad84ad38c tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/libxc/xc_domain_save.c Thu Nov 12 16:48:53 2009 -0700
@@ -1099,6 +1099,28 @@ int xc_domain_save(int xc_handle, int io
ERROR("Error when writing to state file (tmem)");
goto out;
}
+
+ {
+ int marker = -7;
+ uint32_t tsc_mode, khz, incarn;
+ uint64_t nsec;
+ if ( xc_domain_get_tsc_info(xc_handle, dom, &tsc_mode,
+ &nsec, &khz, &incarn) < 0 ||
+ write_exact(io_fd, &marker, sizeof(marker)) ||
+ write_exact(io_fd, &tsc_mode, sizeof(tsc_mode)) ||
+ write_exact(io_fd, &nsec, sizeof(nsec)) ||
+ write_exact(io_fd, &khz, sizeof(khz)) ||
+ write_exact(io_fd, &incarn, sizeof(incarn)) )
+ {
+ ERROR("Error when writing to state file (tsc)");
+ goto out;
+ }
+ /* no PRIxxx formatting?
+ DPRINTK("tsc_info written: mode=%"PRIu32",ns=0x%"PRIx64","
+ "khz=%"PRIu32",incarn=%"PRIu32"\n",
+ tsc_mode, nsec, khz, incarn);
+ */
+ }
copypages:
#define write_exact(fd, buf, len) write_buffer(last_iter, &ob, (fd), (buf),
(len))
@@ -1707,6 +1729,7 @@ int xc_domain_save(int xc_handle, int io
PERROR("Error when writing to state file (2)");
goto out;
}
+
}
/*
diff -r 494ad84ad38c tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/libxc/xenctrl.h Thu Nov 12 16:48:53 2009 -0700
@@ -627,6 +627,20 @@ int xc_domain_set_time_offset(int xc_han
int xc_domain_set_time_offset(int xc_handle,
uint32_t domid,
int32_t time_offset_seconds);
+
+int xc_domain_set_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t tsc_mode,
+ uint64_t elapsed_nsec,
+ uint32_t gtsc_khz,
+ uint32_t incarnation);
+
+int xc_domain_get_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t *tsc_mode,
+ uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz,
+ uint32_t *incarnation);
int xc_domain_set_tsc_native(int xc_handle, uint32_t domid, int is_native);
diff -r 494ad84ad38c tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Nov 12 16:48:53 2009 -0700
@@ -1479,6 +1479,20 @@ static PyObject *pyxc_domain_set_tsc_nat
return NULL;
if (xc_domain_set_tsc_native(self->xc_handle, dom, is_native) != 0)
+ return pyxc_error_to_exception();
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_domain_set_tsc_mode(XcObject *self, PyObject *args)
+{
+ uint32_t dom, tsc_mode;
+
+ if (!PyArg_ParseTuple(args, "ii", &dom, &tsc_mode))
+ return NULL;
+
+ if (xc_domain_set_tsc_info(self->xc_handle, dom, tsc_mode, 0, 0, 0) != 0)
return pyxc_error_to_exception();
Py_INCREF(zero);
@@ -2029,6 +2043,15 @@ static PyMethodDef pyxc_methods[] = {
" is_native [int]: 1=native, 0=emulate.\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+ { "domain_set_tsc_mode",
+ (PyCFunction)pyxc_domain_set_tsc_mode,
+ METH_VARARGS, "\n"
+ "Set a domain's TSC mode\n"
+ " dom [int]: Domain whose TSC mode is being set.\n"
+ " tsc_mode [int]: 0=default (monotonic, but native where possible)\n"
+ " 1=always emulate 2=never emulate 3=pvrdtscp\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
{ "domain_disable_migrate",
(PyCFunction)pyxc_domain_disable_migrate,
METH_VARARGS, "\n"
diff -r 494ad84ad38c tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/python/xen/xend/XendConfig.py Thu Nov 12 16:48:53 2009 -0700
@@ -164,6 +164,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'vnclisten': str,
'timer_mode': int,
'tsc_native': int,
+ 'tsc_mode': int,
'vpt_align': int,
'viridian': int,
'vncpasswd': str,
@@ -479,6 +480,9 @@ class XendConfig(dict):
if 'tsc_native' not in self['platform']:
self['platform']['tsc_native'] = 0
+
+ if 'tsc_mode' not in self['platform']:
+ self['platform']['tsc_mode'] = 0
if 'nomigrate' not in self['platform']:
self['platform']['nomigrate'] = 0
diff -r 494ad84ad38c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu Nov 12 16:48:53 2009 -0700
@@ -2467,6 +2467,11 @@ class XendDomainInfo:
if arch.type == "x86" and tsc_native is not None:
xc.domain_set_tsc_native(self.domid, int(tsc_native))
+ # Set TSC mode of domain
+ tsc_mode = self.info["platform"].get("tsc_mode")
+ if arch.type == "x86" and tsc_mode is not None:
+ xc.domain_set_tsc_native(self.domid, int(tsc_mode))
+
# Set timer configuration of domain
timer_mode = self.info["platform"].get("timer_mode")
if hvm and timer_mode is not None:
diff -r 494ad84ad38c tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/python/xen/xm/create.py Thu Nov 12 16:48:53 2009 -0700
@@ -224,6 +224,10 @@ gopts.var('tsc_native', val='TSC_NATIVE'
gopts.var('tsc_native', val='TSC_NATIVE',
fn=set_int, default=0,
use="""TSC mode (0=emulate TSC, 1=native TSC).""")
+
+gopts.var('tsc_mode', val='TSC_MODE',
+ fn=set_int, default=0,
+ use="""TSC mode (0=default, 1=always emulate, 2=never emulate,
3=pvrdtscp).""")
gopts.var('nomigrate', val='NOMIGRATE',
fn=set_int, default=0,
@@ -741,6 +745,10 @@ def configure_image(vals):
if vals.tsc_native is not None:
config_image.append(['tsc_native', vals.tsc_native])
+ # DJM DJM don't think I need this???
+ #if vals.tsc_mode is not None:
+ # config_image.append(['tsc_mode', vals.tsc_mode])
+
if vals.nomigrate is not None:
config_image.append(['nomigrate', vals.nomigrate])
@@ -1027,7 +1035,7 @@ def make_config(vals):
config.append([n, v])
map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
- 'restart', 'on_poweroff', 'tsc_native', 'nomigrate',
+ 'restart', 'on_poweroff', 'tsc_native', 'tsc_mode',
'nomigrate',
'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
'on_xend_start', 'on_xend_stop', 'target', 'cpuid',
'cpuid_check', 'machine_address_size',
'suppress_spurious_page_faults'])
diff -r 494ad84ad38c tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py Mon Nov 09 22:41:23 2009 +0000
+++ b/tools/python/xen/xm/xenapi_create.py Thu Nov 12 16:48:53 2009 -0700
@@ -1078,6 +1078,7 @@ class sxp2xml:
'pci_power_mgmt',
'xen_platform_pci',
'tsc_native'
+ 'tsc_mode'
'description',
'nomigrate'
]
diff -r 494ad84ad38c xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/arch/x86/domain.c Thu Nov 12 16:48:53 2009 -0700
@@ -520,6 +520,9 @@ int arch_domain_create(struct domain *d,
d->arch.cpuids[i].input[1] = XEN_CPUID_INPUT_UNUSED;
}
+ /* initialize default tsc behavior in case tools don't */
+if (d->domain_id) //DJM
+ tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
spin_lock_init(&d->arch.vtsc_lock);
return 0;
diff -r 494ad84ad38c xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/arch/x86/domctl.c Thu Nov 12 16:48:53 2009 -0700
@@ -1111,9 +1111,65 @@ long arch_do_domctl(
break;
domain_pause(d);
- d->arch.vtsc = !domctl->u.set_tsc_native.is_native;
- if ( is_hvm_domain(d) )
- hvm_set_rdtsc_exiting(d, d->arch.vtsc || hvm_gtsc_need_scale(d));
+printk("DJM DJM ignoring set_tsc_native for now\n");
+ //d->arch.vtsc = !domctl->u.set_tsc_native.is_native;
+ //if ( is_hvm_domain(d) )
+ //hvm_set_rdtsc_exiting(d, d->arch.vtsc || hvm_gtsc_need_scale(d));
+ domain_unpause(d);
+
+ rcu_unlock_domain(d);
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_gettscinfo:
+ {
+ struct domain *d;
+ xen_guest_tsc_info_t info;
+
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d == NULL )
+ break;
+
+ domain_pause(d);
+ tsc_get_info(d, &info.tsc_mode,
+ &info.elapsed_nsec,
+ &info.gtsc_khz,
+ &info.incarnation);
+ /* tsc_get_info(d, &domctl->u.tsc_info.tsc_mode,
+ &domctl->u.tsc_info.elapsed_nsec,
+ &domctl->u.tsc_info.gtsc_khz,
+ &domctl->u.tsc_info.incarnation);*/
+ //if ( raw_copy_to_guest(domctl->u.tsc_info.out_info.p, &info,
sizeof(info)) )
+ if ( copy_to_guest(domctl->u.tsc_info.out_info, &info, 1) )
+{
+printk("XEN_DOMCTL_gettscinfo: copy failed, out_info=%p\n",
+domctl->u.tsc_info.out_info.p);
+ ret = -EFAULT;
+}
+ else
+ ret = 0;
+ domain_unpause(d);
+
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_settscinfo:
+ {
+ struct domain *d;
+
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d == NULL )
+ break;
+
+ domain_pause(d);
+ tsc_set_info(d, domctl->u.tsc_info.info.tsc_mode,
+ domctl->u.tsc_info.info.elapsed_nsec,
+ domctl->u.tsc_info.info.gtsc_khz,
+ domctl->u.tsc_info.info.incarnation);
domain_unpause(d);
rcu_unlock_domain(d);
diff -r 494ad84ad38c xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/arch/x86/time.c Thu Nov 12 16:48:53 2009 -0700
@@ -818,6 +818,7 @@ static void __update_vcpu_system_time(st
struct cpu_time *t;
struct vcpu_time_info *u, _u;
XEN_GUEST_HANDLE(vcpu_time_info_t) user_u;
+ s_time_t vtsc_stamp = 0;
if ( v->vcpu_info == NULL )
return;
@@ -825,9 +826,15 @@ static void __update_vcpu_system_time(st
t = &this_cpu(cpu_time);
u = &vcpu_info(v, time);
+ if ( v->domain->arch.vtsc )
+ /* FIXME: need scaling here too? */
+ vtsc_stamp = t->stime_local_stamp - v->domain->arch.vtsc_offset;
+ else if ( v->domain->arch.pvrdtscp )
+ /* FIXME: write tsc_aux here? */;
+
/* Don't bother unless timestamps have changed or we are forced. */
if ( !force && (u->tsc_timestamp == (v->domain->arch.vtsc
- ? t->stime_local_stamp
+ ? vtsc_stamp
: t->local_tsc_stamp)) )
return;
@@ -835,8 +842,8 @@ static void __update_vcpu_system_time(st
if ( v->domain->arch.vtsc )
{
- _u.tsc_timestamp = t->stime_local_stamp;
- _u.system_time = t->stime_local_stamp;
+ _u.tsc_timestamp = vtsc_stamp;
+ _u.system_time = vtsc_stamp;
_u.tsc_to_system_mul = 0x80000000u;
_u.tsc_shift = 1;
}
@@ -1598,8 +1605,126 @@ void pv_soft_rdtsc(struct vcpu *v, struc
spin_unlock(&v->domain->arch.vtsc_lock);
+ now -= v->domain->arch.vtsc_offset;
+ if ( v->domain->arch.vtsc_shift != 1 ||
+ v->domain->arch.vtsc_mul_frac != 0x80000000u )
+ {
+/* FIXME
+ struct time_scale scale;
+ scale.shift = v->domain->arch.vtsc_shift;
+ scale.mul_frac = v->domain->arch.vtsc_mul_frac;
+ now = scale_delta(now, &scale);
+*/
+ }
+
regs->eax = (uint32_t)now;
regs->edx = (uint32_t)(now >> 32);
+}
+
+int host_tsc_is_safe(void)
+{
+ extern unsigned int max_cstate;
+
+ if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+ return 1;
+ if ( num_online_cpus() == 1 )
+ return 1;
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && max_cstate <= 2 )
+ return 1;
+ return 0;
+}
+
+/* called to collect tsc-related data only for save file or live migrate */
+void tsc_get_info(struct domain *d, uint32_t *tsc_mode, uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz, uint32_t *incarnation)
+{
+ switch ( *tsc_mode = d->arch.tsc_mode )
+ {
+ case TSC_MODE_NEVER_EMULATE:
+ *elapsed_nsec = *gtsc_khz = *incarnation = 0;
+ break;
+ case TSC_MODE_ALWAYS_EMULATE:
+ *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; /* FIXME scale? */
+ *gtsc_khz = 1000000UL;
+ break;
+ case TSC_MODE_DEFAULT:
+ *incarnation = d->arch.incarnation;
+ *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; /* FIXME scale? */
+ *gtsc_khz = ( d->arch.vtsc == 0 ) ? cpu_khz : 1000000UL;
+ break;
+ case TSC_MODE_PVRDTSCP:
+ *incarnation = d->arch.incarnation;
+ *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; /* FIXME scale? */
+ *gtsc_khz = d->arch.tsc_khz;
+ break;
+ }
+printk("DJM tsc_get_info got: dom%u,mode=%d,ns=0x%lx,khz=%d,inc=%d\n",
+d->domain_id,(int)*tsc_mode,(long)*elapsed_nsec,(int)*gtsc_khz,(int)*incarnation);
+}
+
+/* called to set tsc-related data only on restore or target of live migrate */
+void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
+ uint32_t gtsc_khz, uint32_t incarnation)
+{
+ struct time_scale scale;
+
+printk("DJM tsc_set_info before: dom%u,mode=%d,ns=0x%lx,khz=%d,inc=%d\n",
+d->domain_id,(int)tsc_mode,(long)elapsed_nsec,(int)gtsc_khz,(int)incarnation);
+ switch ( d->arch.tsc_mode = tsc_mode )
+ {
+ case TSC_MODE_NEVER_EMULATE:
+ d->arch.vtsc = 0;
+ break;
+ case TSC_MODE_ALWAYS_EMULATE:
+ d->arch.vtsc = 1;
+ d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
+ d->arch.vtsc_shift = 1;
+ d->arch.vtsc_mul_frac = 0x80000000U;
+ break;
+ case TSC_MODE_DEFAULT:
+ if ( host_tsc_is_safe() && incarnation == 0 )
+ {
+ d->arch.vtsc = 0;
+ d->arch.incarnation = 1;
+ d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
+ /* d->arch.vtsc_shift/mul_frac/offset will not be used */
+ } else if ( gtsc_khz != 0 && gtsc_khz != 1000000 ) {
+printk("DJM tsc_set_info A: khz=%d\n",gtsc_khz);
+ d->arch.vtsc = 1;
+ set_time_scale(&scale, gtsc_khz * 1000 );
+ d->arch.vtsc_shift = scale.shift;
+ d->arch.vtsc_mul_frac = scale.mul_frac;
+ d->arch.vtsc_offset = get_s_time() - elapsed_nsec; /* FIXME? */
+ } else {
+ d->arch.vtsc = 1;
+ d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
+ d->arch.vtsc_shift = 1;
+ d->arch.vtsc_mul_frac = 0x80000000U;
+ d->arch.incarnation = incarnation + 1;
+printk("DJM tsc_set_info B: offset=%ld\n",(long)d->arch.vtsc_offset);
+ }
+ break;
+ case TSC_MODE_PVRDTSCP:
+ /* if (hardware supports rdtscp instruction) FIXME */
+ d->arch.pvrdtscp = 1;
+ d->arch.vtsc = 0;
+ if ( gtsc_khz != 0 ) {
+ set_time_scale(&scale, gtsc_khz * 1000 );
+ d->arch.vtsc_shift = scale.shift;
+ d->arch.vtsc_mul_frac = scale.mul_frac;
+ } else {
+ d->arch.vtsc = 1;
+ d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
+ d->arch.vtsc_shift = 1;
+ d->arch.vtsc_mul_frac = 0x80000000U;
+ d->arch.incarnation = incarnation + 1;
+ }
+ break;
+ }
+ if ( is_hvm_domain(d) )
+ hvm_set_rdtsc_exiting(d, d->arch.vtsc || hvm_gtsc_need_scale(d));
+printk("DJM tsc_set_info after:
dom%u,vtsc=%d,ofs=0x%lx,sh=%d,mulfrac=0x%08x,inc=%d\n",
+d->domain_id,(int)d->arch.vtsc,(long)d->arch.vtsc_offset,(int)d->arch.vtsc_shift,(int)d->arch.vtsc_mul_frac,(int)d->arch.incarnation);
}
/* vtsc may incur measurable performance degradation, diagnose with this */
@@ -1607,33 +1732,51 @@ static void dump_softtsc(unsigned char k
{
struct domain *d;
int domcnt = 0;
+ extern unsigned int max_cstate;
tsc_check_reliability();
if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
printk("TSC marked as reliable, "
"warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
- printk("TSC marked as constant but not reliable, "
- "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
- else
+ {
+ printk("TSC has constant rate, ");
+ if (max_cstate <= 2)
+ printk("no deep Cstates possible, so deemed reliable, ");
+ else
+ printk("deep Cstates possible, so not reliable, ");
+ printk("warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
+ } else
printk("TSC not marked as either constant or reliable, "
"warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
for_each_domain ( d )
{
+ if ( d->domain_id == 0 && d->arch.tsc_mode == TSC_MODE_DEFAULT )
+ continue;
+ printk("dom%u%s: mode=%d",d->domain_id,
+ is_hvm_domain(d) ? "(hvm)" : "", d->arch.tsc_mode);
+ if ( d->arch.vtsc_offset )
+ printk(",ofs=0x%"PRIx64"",d->arch.vtsc_offset);
+ if ( d->arch.tsc_khz )
+ printk(",khz=%"PRIu32"",d->arch.tsc_khz);
+ if ( d->arch.incarnation )
+ printk(",inc=%"PRIu32"",d->arch.incarnation);
if ( !d->arch.vtsc )
+ {
+ printk("\n");
continue;
+ }
if ( is_hvm_domain(d) )
- printk("dom%u (hvm) vtsc count: %"PRIu64" total\n",
- d->domain_id, d->arch.vtsc_kerncount);
+ printk(",vtsc count: %"PRIu64" total\n",
+ d->arch.vtsc_kerncount);
else
- printk("dom%u vtsc count: %"PRIu64" kernel, %"PRIu64" user\n",
- d->domain_id, d->arch.vtsc_kerncount,
- d->arch.vtsc_usercount);
+ printk(",vtsc count: %"PRIu64" kernel, %"PRIu64" user\n",
+ d->arch.vtsc_kerncount, d->arch.vtsc_usercount);
domcnt++;
}
if ( !domcnt )
- printk("All domains have native TSC\n");
+ printk("No domains have emulated TSC\n");
}
static struct keyhandler dump_softtsc_keyhandler = {
diff -r 494ad84ad38c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/include/asm-x86/domain.h Thu Nov 12 16:48:53 2009 -0700
@@ -299,9 +299,17 @@ struct arch_domain
struct domain_mca_msrs vmca_msrs;
/* SoftTSC emulation */
- bool_t vtsc;
- s_time_t vtsc_last;
+ int tsc_mode; /* see include/asm-x86/time.h */
+ bool_t vtsc; /* 1 == enable tsc emulation */
+ bool_t pvrdtscp; /* set TSC_AUX to incarnation on all vcpus */
+ s_time_t vtsc_last; /* previous value (to guarantee monotonicity) */
spinlock_t vtsc_lock;
+ uint64_t vtsc_offset; /* adjustment for save/restore/migrate */
+ uint32_t tsc_khz; /* cached khz for certain emulated cases */
+ uint32_t vtsc_shift; /* cached scaling for certain emulated cases */
+ uint32_t vtsc_mul_frac; /* cached scaling for certain emulated cases */
+ uint32_t incarnation; /* incremented every restore or live migrate
+ (possibly other cases in the future */
uint64_t vtsc_kerncount; /* for hvm, counts all vtsc */
uint64_t vtsc_usercount; /* not used for hvm */
} __cacheline_aligned;
diff -r 494ad84ad38c xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/include/asm-x86/time.h Thu Nov 12 16:48:53 2009 -0700
@@ -3,6 +3,24 @@
#define __X86_TIME_H__
#include <asm/msr.h>
+
+/*
+ * PV TSC emulation modes:
+ * 0 = guest rdtsc/p executed natively when monotonicity can be guaranteed
+ * and emulated otherwise (with frequency scaled if necessary)
+ * 1 = guest rdtsc/p always emulated at 1GHz (kernel and user)
+ * 2 = guest rdtsc always executed natively (no monotonicity/frequency
+ * guarantees); guest rdtscp emulated at native frequency if
+ * unsupported by h/w, else executed natively
+ * 3 = same as 2, except xen manages TSC_AUX register so guest can
+ * determine when a restore/migration has occurred and assumes
+ * guest obtains/uses pvclock-like mechanism to adjust for
+ * monotonicity and frequency changes
+ */
+#define TSC_MODE_DEFAULT 0
+#define TSC_MODE_ALWAYS_EMULATE 1
+#define TSC_MODE_NEVER_EMULATE 2
+#define TSC_MODE_PVRDTSCP 3
void calibrate_tsc_bp(void);
void calibrate_tsc_ap(void);
@@ -43,6 +61,13 @@ uint64_t ns_to_acpi_pm_tick(uint64_t ns)
void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
+void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
+ uint32_t gtsc_khz, uint32_t incarnation);
+
+void tsc_get_info(struct domain *d, uint32_t *tsc_mode, uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz, uint32_t *incarnation);
+
+
void force_update_vcpu_system_time(struct vcpu *v);
#endif /* __X86_TIME_H__ */
diff -r 494ad84ad38c xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Mon Nov 09 22:41:23 2009 +0000
+++ b/xen/include/public/domctl.h Thu Nov 12 16:48:53 2009 -0700
@@ -656,6 +656,22 @@ typedef struct xen_domctl_disable_migrat
} xen_domctl_disable_migrate_t;
+#define XEN_DOMCTL_gettscinfo 59
+#define XEN_DOMCTL_settscinfo 60
+struct xen_guest_tsc_info {
+ uint32_t tsc_mode;
+ uint32_t gtsc_khz;
+ uint32_t incarnation;
+ uint32_t pad;
+ uint64_t elapsed_nsec;
+};
+typedef struct xen_guest_tsc_info xen_guest_tsc_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t);
+typedef struct xen_domctl_tsc_info {
+ XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */
+ xen_guest_tsc_info_t info; /* IN */
+} xen_domctl_tsc_info_t;
+
#define XEN_DOMCTL_gdbsx_guestmemio 1000 /* guest mem io */
struct xen_domctl_gdbsx_memio {
uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */
@@ -707,6 +723,7 @@ struct xen_domctl {
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_set_tsc_native set_tsc_native;
struct xen_domctl_disable_migrate disable_migrate;
+ struct xen_domctl_tsc_info tsc_info;
struct xen_domctl_real_mode_area real_mode_area;
struct xen_domctl_hvmcontext hvmcontext;
struct xen_domctl_hvmcontext_partial hvmcontext_partial;
tscmode-draft.patch
Description: Binary data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|