# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 6374af16a8a3c27d107fe9145f28bf08020fda28
# Parent 9061e1246906e8d1b7f6519c5252e6182f73214d
[XEN][X86_64] USe GLOBAL bit to build user mappings.
Avoids need to flush user mappings when switching between
user and kernel contexts.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/x86/domain_build.c | 9 +--
xen/arch/x86/flushtlb.c | 101 ++++++++++++++++++++++++++------------
xen/arch/x86/mm.c | 26 +++++++++
xen/arch/x86/x86_64/traps.c | 6 ++
xen/include/asm-x86/flushtlb.h | 7 --
xen/include/asm-x86/x86_64/page.h | 15 +++++
6 files changed, 122 insertions(+), 42 deletions(-)
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/domain_build.c Tue Sep 19 10:50:10 2006 +0100
@@ -74,10 +74,11 @@ string_param("dom0_ioports_disable", opt
#define L3_PROT (_PAGE_PRESENT)
#elif defined(__x86_64__)
/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL)
+#define L2_PROT (BASE_PROT|_PAGE_DIRTY)
+#define L3_PROT (BASE_PROT|_PAGE_DIRTY)
+#define L4_PROT (BASE_PROT|_PAGE_DIRTY)
#endif
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/flushtlb.c Tue Sep 19 10:50:10 2006 +0100
@@ -4,13 +4,14 @@
* TLB flushes are timestamped using a global virtual 'clock' which ticks
* on any TLB flush on any processor.
*
- * Copyright (c) 2003-2004, K A Fraser
+ * Copyright (c) 2003-2006, K A Fraser
*/
#include <xen/config.h>
#include <xen/sched.h>
#include <xen/softirq.h>
#include <asm/flushtlb.h>
+#include <asm/page.h>
/* Debug builds: Wrap frequently to stress-test the wrap logic. */
#ifdef NDEBUG
@@ -22,21 +23,17 @@ u32 tlbflush_clock = 1U;
u32 tlbflush_clock = 1U;
DEFINE_PER_CPU(u32, tlbflush_time);
-void write_cr3(unsigned long cr3)
+/*
+ * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value.
+ *
+ * This must happen *before* we flush the TLB. If we do it after, we race other
+ * CPUs invalidating PTEs. For example, a page invalidated after the flush
+ * might get the old timestamp, but this CPU can speculatively fetch the
+ * mapping into its TLB after the flush but before inc'ing the clock.
+ */
+static u32 pre_flush(void)
{
u32 t, t1, t2;
- unsigned long flags;
-
- /* This non-reentrant function is sometimes called in interrupt context. */
- local_irq_save(flags);
-
- /*
- * STEP 1. Increment the virtual clock *before* flushing the TLB.
- * If we do it after, we race other CPUs invalidating PTEs.
- * (e.g., a page invalidated after the flush might get the old
- * timestamp, but this CPU can speculatively fetch the mapping
- * into its TLB after the flush but before inc'ing the clock).
- */
t = tlbflush_clock;
do {
@@ -52,26 +49,68 @@ void write_cr3(unsigned long cr3)
if ( unlikely(t2 == 0) )
raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
- /*
- * STEP 2. Update %CR3, thereby flushing the TLB.
- */
+ skip_clocktick:
+ return t2;
+}
- skip_clocktick:
+/*
+ * post_flush(): Update this CPU's timestamp with specified clock value.
+ *
+ * Note that this happens *after* flushing the TLB, as otherwise we can race a
+ * NEED_FLUSH() test on another CPU. (e.g., other CPU sees the updated CPU
+ * stamp and so does not force a synchronous TLB flush, but the flush in this
+ * function hasn't yet occurred and so the TLB might be stale). The ordering
+ * would only actually matter if this function were interruptible, and
+ * something that abuses the stale mapping could exist in an interrupt
+ * handler. In fact neither of these is the case, so really we are being ultra
+ * paranoid.
+ */
+static void post_flush(u32 t)
+{
+ this_cpu(tlbflush_time) = t;
+}
+
+void write_cr3(unsigned long cr3)
+{
+ unsigned long flags;
+ u32 t;
+
+ /* This non-reentrant function is sometimes called in interrupt context. */
+ local_irq_save(flags);
+
+ t = pre_flush();
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+ __pge_off();
__asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+ __pge_on();
+#else
+ __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+#endif
- /*
- * STEP 3. Update this CPU's timestamp. Note that this happens *after*
- * flushing the TLB, as otherwise we can race a NEED_FLUSH() test
- * on another CPU. (e.g., other CPU sees the updated CPU stamp and
- * so does not force a synchronous TLB flush, but the flush in this
- * function hasn't yet occurred and so the TLB might be stale).
- * The ordering would only actually matter if this function were
- * interruptible, and something that abuses the stale mapping could
- * exist in an interrupt handler. In fact neither of these is the
- * case, so really we are being ultra paranoid.
- */
-
- this_cpu(tlbflush_time) = t2;
+ post_flush(t);
local_irq_restore(flags);
}
+
+void local_flush_tlb(void)
+{
+ unsigned long flags;
+ u32 t;
+
+ /* This non-reentrant function is sometimes called in interrupt context. */
+ local_irq_save(flags);
+
+ t = pre_flush();
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+ __pge_off();
+ __pge_on();
+#else
+ __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
+#endif
+
+ post_flush(t);
+
+ local_irq_restore(flags);
+}
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/mm.c Tue Sep 19 10:50:10 2006 +0100
@@ -694,11 +694,30 @@ get_page_from_l4e(
#endif /* 4 level */
#ifdef __x86_64__
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+#define adjust_guest_l1e(pl1e) \
+ do { \
+ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \
+ { \
+ /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
+ if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
+ == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
+ MEM_LOG("Global bit is set to kernel page %lx", \
+ l1e_get_pfn((pl1e))); \
+ if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
+ l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
+ if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
+ l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
+ } \
+ } while ( 0 )
+#else
#define adjust_guest_l1e(pl1e) \
- do { \
+ do { \
if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \
l1e_add_flags((pl1e), _PAGE_USER); \
} while ( 0 )
+#endif
#define adjust_guest_l2e(pl2e) \
do { \
@@ -717,10 +736,13 @@ get_page_from_l4e(
if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \
l4e_add_flags((pl4e), _PAGE_USER); \
} while ( 0 )
-#else
+
+#else /* !defined(__x86_64__) */
+
#define adjust_guest_l1e(_p) ((void)0)
#define adjust_guest_l2e(_p) ((void)0)
#define adjust_guest_l3e(_p) ((void)0)
+
#endif
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/x86_64/traps.c Tue Sep 19 10:50:10 2006 +0100
@@ -15,6 +15,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/msr.h>
+#include <asm/page.h>
#include <asm/shadow.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
@@ -188,7 +189,12 @@ void toggle_guest_mode(struct vcpu *v)
v->arch.flags ^= TF_kernel_mode;
__asm__ __volatile__ ( "swapgs" );
update_cr3(v);
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+ /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
+ __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+#else
write_ptbase(v);
+#endif
}
unsigned long do_iret(void)
diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/flushtlb.h
--- a/xen/include/asm-x86/flushtlb.h Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/include/asm-x86/flushtlb.h Tue Sep 19 10:50:10 2006 +0100
@@ -71,11 +71,8 @@ static inline unsigned long read_cr3(voi
/* Write pagetable base and implicitly tick the tlbflush clock. */
extern void write_cr3(unsigned long cr3);
-#define local_flush_tlb() \
- do { \
- unsigned long cr3 = read_cr3(); \
- write_cr3(cr3); \
- } while ( 0 )
+/* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. */
+extern void local_flush_tlb(void);
#define local_flush_tlb_pge() \
do { \
diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/include/asm-x86/x86_64/page.h Tue Sep 19 10:50:10 2006 +0100
@@ -93,6 +93,21 @@ typedef l4_pgentry_t root_pgentry_t;
#define GRANT_PTE_FLAGS \
(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB|_PAGE_USER)
+#define USER_MAPPINGS_ARE_GLOBAL
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+/*
+ * Bit 12 of a 24-bit flag mask. This corresponds to bit 52 of a pte.
+ * This is needed to distinguish between user and kernel PTEs since _PAGE_USER
+ * is asserted for both.
+ */
+#define _PAGE_GUEST_KERNEL (1U<<12)
+/* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */
+#undef L1_DISALLOW_MASK
+#define L1_DISALLOW_MASK ((BASE_DISALLOW_MASK | _PAGE_GNTTAB) & ~_PAGE_GLOBAL)
+#else
+#define _PAGE_GUEST_KERNEL 0
+#endif
+
#endif /* __X86_64_PAGE_H__ */
/*
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|