| Linux, under CONFIG_SECCOMP, has been capable of hiding the TSC from
processes for quite a while. This patch enables this to actually work
for pv kernels, by allowing them to control CR4.TSD (and, as a simple
thing to do at the same time, CR4.DE).
Applies cleanly only on top of the previously submitted debug register
handling patch.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2007-10-10/xen/arch/x86/acpi/power.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/acpi/power.c   2007-10-29 11:19:58.000000000 
+0100
+++ 2007-10-10/xen/arch/x86/acpi/power.c        2007-10-26 15:08:38.000000000 
+0200
@@ -155,6 +155,8 @@ static int enter_state(u32 state)
 
     pmprintk(XENLOG_DEBUG, "Back to C.");
 
+    write_cr4(idle_vcpu[smp_processor_id()]->arch.cr4);
+
     device_power_up();
 
     pmprintk(XENLOG_INFO, "Finishing wakeup from ACPI S%d state.", state);
Index: 2007-10-10/xen/arch/x86/domain.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/domain.c       2007-10-26 16:47:52.000000000 
+0200
+++ 2007-10-10/xen/arch/x86/domain.c    2007-10-29 11:20:35.000000000 +0100
@@ -413,6 +413,8 @@ int vcpu_initialise(struct vcpu *v)
             v->arch.schedule_tail = continue_idle_domain;
             v->arch.cr3           = __pa(idle_pg_table);
         }
+        else
+            v->arch.cr4 = mmu_cr4_features;
     }
 
     v->arch.perdomain_ptes =
@@ -1195,6 +1197,12 @@ static void paravirt_ctxt_switch_to(stru
     set_int80_direct_trap(v);
     switch_kernel_stack(v);
 
+    if ( unlikely(idle_vcpu[v->processor]->arch.cr4 != v->arch.cr4) )
+    {
+        idle_vcpu[v->processor]->arch.cr4 = v->arch.cr4;
+        write_cr4(v->arch.cr4);
+    }
+
     /* Maybe switch the debug registers. */
     cond_loaddebug(v, 0);
     cond_loaddebug(v, 1);
Index: 2007-10-10/xen/arch/x86/flushtlb.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/flushtlb.c     2007-10-29 11:19:58.000000000 
+0100
+++ 2007-10-10/xen/arch/x86/flushtlb.c  2007-10-29 12:03:23.000000000 +0100
@@ -23,6 +23,19 @@
 u32 tlbflush_clock = 1U;
 DEFINE_PER_CPU(u32, tlbflush_time);
 
+static inline void __pge_off(void)
+{
+    write_cr4(mmu_cr4_features & ~X86_CR4_PGE);
+}
+
+static inline void __pge_on(void)
+{
+    struct vcpu *idle = idle_vcpu[smp_processor_id()];
+
+    write_cr4(likely(idle != NULL) && likely(idle != INVALID_VCPU) &&
+              likely(idle->arch.cr4) ? idle->arch.cr4 : mmu_cr4_features);
+}
+
 /*
  * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value.
  * 
Index: 2007-10-10/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-10-29 11:19:58.000000000 
+0100
+++ 2007-10-10/xen/arch/x86/hvm/vmx/vmcs.c      2007-10-26 14:35:40.000000000 
+0200
@@ -498,7 +498,7 @@ static int construct_vmcs(struct vcpu *v
 
     /* Host control registers. */
     __vmwrite(HOST_CR0, read_cr0() | X86_CR0_TS);
-    __vmwrite(HOST_CR4, read_cr4());
+    __vmwrite(HOST_CR4, mmu_cr4_features);
 
     /* Host CS:RIP. */
     __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
Index: 2007-10-10/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/hvm/vmx/vmx.c  2007-10-29 09:01:31.000000000 
+0100
+++ 2007-10-10/xen/arch/x86/hvm/vmx/vmx.c       2007-10-29 09:10:52.000000000 
+0100
@@ -728,6 +728,7 @@ static void vmx_ctxt_switch_from(struct 
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
 {
+    idle_vcpu[v->processor]->arch.cr4 = mmu_cr4_features;
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 }
Index: 2007-10-10/xen/arch/x86/setup.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/setup.c        2007-10-29 11:19:58.000000000 
+0100
+++ 2007-10-10/xen/arch/x86/setup.c     2007-10-29 12:00:01.000000000 +0100
@@ -412,7 +412,7 @@ void __init __start_xen(unsigned long mb
 
     parse_video_info();
 
-    set_current((struct vcpu *)0xfffff000); /* debug sanity */
+    set_current(INVALID_VCPU); /* debug sanity */
     idle_vcpu[0] = current;
     set_processor_id(0); /* needed early, for smp_processor_id() */
 
Index: 2007-10-10/xen/arch/x86/traps.c
===================================================================
--- 2007-10-10.orig/xen/arch/x86/traps.c        2007-10-26 15:53:13.000000000 
+0200
+++ 2007-10-10/xen/arch/x86/traps.c     2007-10-26 16:55:03.000000000 +0200
@@ -1717,10 +1717,24 @@ static int emulate_privileged_op(struct 
             break;
 
         case 4: /* Write CR4 */
-            if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
-                gdprintk(XENLOG_WARNING,
-                         "Attempt to change CR4 flags %08lx -> %08lx\n",
-                         read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE), *reg);
+            res = read_cr4();
+            if ( *reg != (res & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+            {
+                unsigned long hv_cr4_mask = ~X86_CR4_TSD;
+
+                if ( cpu_has_de && IS_PRIV(v->domain) )
+                    hv_cr4_mask &= ~X86_CR4_DE;
+                if ( (*reg & hv_cr4_mask) !=
+                     (res & hv_cr4_mask & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+                    gdprintk(XENLOG_WARNING,
+                             "Attempt to change CR4 flags %08lx -> %08lx\n",
+                             res & ~(X86_CR4_PGE|X86_CR4_PSE), *reg);
+                res &= hv_cr4_mask;
+                res |= *reg & ~hv_cr4_mask;
+                v->arch.cr4 = res;
+                idle_vcpu[v->processor]->arch.cr4 = v->arch.cr4;
+                write_cr4(res);
+            }
             break;
 
         default:
@@ -1785,6 +1799,10 @@ static int emulate_privileged_op(struct 
         }
         break;
 
+    case 0x31: /* RDTSC */
+        rdtsc(regs->eax, regs->edx);
+        break;
+
     case 0x32: /* RDMSR */
         switch ( regs->ecx )
         {
Index: 2007-10-10/xen/include/asm-x86/domain.h
===================================================================
--- 2007-10-10.orig/xen/include/asm-x86/domain.h        2007-10-29 
11:19:58.000000000 +0100
+++ 2007-10-10/xen/include/asm-x86/domain.h     2007-10-26 17:14:51.000000000 
+0200
@@ -18,6 +18,8 @@
 #define is_pv_32on64_vcpu(v)   (is_pv_32on64_domain((v)->domain))
 #define IS_COMPAT(d)           (is_pv_32on64_domain(d))
 
+#define INVALID_VCPU ((struct vcpu *)0xfffff000)
+
 struct trap_bounce {
     uint32_t      error_code;
     uint8_t       flags; /* TBF_ */
@@ -321,6 +323,7 @@ struct arch_vcpu
     pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
     pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
     unsigned long cr3;                  /* (MA) value to install in HW CR3 */
+    unsigned long cr4;                  /* guest-customized CR4 value */
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
Index: 2007-10-10/xen/include/asm-x86/page.h
===================================================================
--- 2007-10-10.orig/xen/include/asm-x86/page.h  2007-10-29 11:19:58.000000000 
+0100
+++ 2007-10-10/xen/include/asm-x86/page.h       2007-10-26 14:21:48.000000000 
+0200
@@ -294,9 +294,6 @@ void paging_init(void);
 void setup_idle_pagetable(void);
 #endif /* !defined(__ASSEMBLY__) */
 
-#define __pge_off() write_cr4(mmu_cr4_features & ~X86_CR4_PGE)
-#define __pge_on()  write_cr4(mmu_cr4_features)
-
 #define _PAGE_PRESENT  0x001U
 #define _PAGE_RW       0x002U
 #define _PAGE_USER     0x004U
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
 |