WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Make the spurious page-fault detect

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Make the spurious page-fault detection logic
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 16 Jun 2006 20:25:21 +0000
Delivery-date: Fri, 16 Jun 2006 13:27:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxxxx
# Node ID e1ae7b3cb5b73f11bed3a51a7f4ded85c30cffd8
# Parent  05ab081f3c67cc4a4b3139090914ad9be5a0a100
[XEN] Make the spurious page-fault detection logic
more robust. In particular it must be able to handle
spurious write faults on mappings that have been
changed from read-only to writable. If a CPU has a stale
read-only entry in its TLB, it is allowed to fault on
the next write access without re-walking the page table.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/traps.c            |  210 ++++++++++++++++++++++++++++------------
 xen/arch/x86/x86_32/traps.c     |   34 ------
 xen/arch/x86/x86_64/traps.c     |   34 ------
 xen/include/asm-x86/processor.h |    8 +
 4 files changed, 155 insertions(+), 131 deletions(-)

diff -r 05ab081f3c67 -r e1ae7b3cb5b7 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Jun 16 18:08:27 2006 +0100
+++ b/xen/arch/x86/traps.c      Fri Jun 16 18:18:55 2006 +0100
@@ -511,9 +511,9 @@ void propagate_page_fault(unsigned long 
     v->vcpu_info->arch.cr2           = addr;
 
     /* Re-set error_code.user flag appropriately for the guest. */
-    error_code &= ~4;
+    error_code &= ~PGERR_user_mode;
     if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
-        error_code |= 4;
+        error_code |= PGERR_user_mode;
 
     ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
     tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
@@ -578,54 +578,91 @@ static int handle_gdt_ldt_mapping_fault(
     (((va) >= HYPERVISOR_VIRT_START))
 #endif
 
-static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+static int __spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
+{
+    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
+#if CONFIG_PAGING_LEVELS >= 4
+    l4_pgentry_t l4e, *l4t;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    l3_pgentry_t l3e, *l3t;
+#endif
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+    unsigned int required_flags, disallowed_flags;
+
+    required_flags  = _PAGE_PRESENT;
+    if ( regs->error_code & PGERR_write_access )
+        required_flags |= _PAGE_RW;
+    if ( regs->error_code & PGERR_user_mode )
+        required_flags |= _PAGE_USER;
+
+    disallowed_flags = 0;
+    if ( regs->error_code & PGERR_instr_fetch )
+        disallowed_flags |= _PAGE_NX;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    l4t = map_domain_page(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    mfn = l4e_get_pfn(l4e);
+    unmap_domain_page(l4t);
+    if ( !(l4e_get_flags(l4e) & required_flags) ||
+         (l4e_get_flags(l4e) & disallowed_flags) )
+        return 0;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    l3t = map_domain_page(mfn);
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    unmap_domain_page(l3t);
+#ifdef CONFIG_X86_PAE
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 0;
+#else
+    if ( !(l3e_get_flags(l3e) & required_flags) ||
+         (l3e_get_flags(l3e) & disallowed_flags) )
+        return 0;
+#endif
+#endif
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    unmap_domain_page(l2t);
+    if ( !(l2e_get_flags(l2e) & required_flags) ||
+         (l2e_get_flags(l2e) & disallowed_flags) )
+        return 0;
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        return 1;
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    unmap_domain_page(l1t);
+    if ( !(l1e_get_flags(l1e) & required_flags) ||
+         (l1e_get_flags(l1e) & disallowed_flags) )
+        return 0;
+    return 1;
+}
+
+static int spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
 {
     struct vcpu   *v = current;
     struct domain *d = v->domain;
-
-    if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
-    {
-        if ( shadow_mode_external(d) && guest_mode(regs) )
-            return shadow_fault(addr, regs);
-        if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
-            return handle_gdt_ldt_mapping_fault(
-                addr - GDT_LDT_VIRT_START, regs);
-    }
-    else if ( unlikely(shadow_mode_enabled(d)) )
-    {
-        return shadow_fault(addr, regs);
-    }
-    else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
-    {
-        LOCK_BIGLOCK(d);
-        if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-             unlikely(l2_linear_offset(addr) ==
-                      d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-        {
-            ptwr_flush(d, PTWR_PT_ACTIVE);
-            UNLOCK_BIGLOCK(d);
-            return EXCRET_fault_fixed;
-        }
-
-        if ( guest_kernel_mode(v, regs) &&
-             /* Protection violation on write? No reserved-bit violation? */
-             ((regs->error_code & 0xb) == 0x3) &&
-             ptwr_do_page_fault(d, addr, regs) )
-        {
-            UNLOCK_BIGLOCK(d);
-            return EXCRET_fault_fixed;
-        }
-        UNLOCK_BIGLOCK(d);
-    }
-
-    return 0;
-}
-
-static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
-{
-    struct vcpu   *v = current;
-    struct domain *d = v->domain;
-    int            rc;
+    int            is_spurious;
+
+    /* Reserved bit violations are never spurious faults. */
+    if ( regs->error_code & PGERR_reserved_bit )
+        return 0;
+
+    LOCK_BIGLOCK(d);
+
+    is_spurious = __spurious_page_fault(addr, regs);
+    if ( is_spurious )
+        goto out;
 
     /*
      * The only possible reason for a spurious page fault not to be picked
@@ -635,10 +672,8 @@ static int spurious_page_fault(unsigned 
     if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
          IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
          shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
-         ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault?    */
-        return 0;
-
-    LOCK_BIGLOCK(d);
+         (regs->error_code & PGERR_page_present) ) /* not-present fault?    */
+        goto out;
 
     /*
      * The page directory could have been detached again while we weren't
@@ -649,16 +684,67 @@ static int spurious_page_fault(unsigned 
                   d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
     {
         ptwr_flush(d, PTWR_PT_ACTIVE);
-        rc = 1;
-    }
-    else
-    {
-        /* Okay, walk the page tables. Only check for not-present faults.*/
-        rc = __spurious_page_fault(addr);
-    }
-
+        is_spurious = 1;
+    }
+
+ out:
     UNLOCK_BIGLOCK(d);
-    return rc;
+    return is_spurious;
+}
+
+static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
+    struct vcpu   *v = current;
+    struct domain *d = v->domain;
+    int            rc;
+
+    if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
+    {
+        if ( shadow_mode_external(d) && guest_mode(regs) )
+            return shadow_fault(addr, regs);
+        if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
+            return handle_gdt_ldt_mapping_fault(
+                addr - GDT_LDT_VIRT_START, regs);
+        /*
+         * Do not propagate spurious faults in the hypervisor area to the
+         * guest. It cannot fix them up.
+         */
+        LOCK_BIGLOCK(d);
+        rc = __spurious_page_fault(addr, regs);
+        UNLOCK_BIGLOCK(d);
+        return rc;
+    }
+
+    if ( unlikely(shadow_mode_enabled(d)) )
+        return shadow_fault(addr, regs);
+
+    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+    {
+        LOCK_BIGLOCK(d);
+        if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
+             unlikely(l2_linear_offset(addr) ==
+                      d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
+        {
+            ptwr_flush(d, PTWR_PT_ACTIVE);
+            UNLOCK_BIGLOCK(d);
+            return EXCRET_fault_fixed;
+        }
+
+        if ( guest_kernel_mode(v, regs) &&
+             /* Protection violation on write? No reserved-bit violation? */
+             ((regs->error_code & (PGERR_page_present |
+                                   PGERR_write_access |
+                                   PGERR_reserved_bit)) ==
+              (PGERR_page_present | PGERR_write_access)) &&
+             ptwr_do_page_fault(d, addr, regs) )
+        {
+            UNLOCK_BIGLOCK(d);
+            return EXCRET_fault_fixed;
+        }
+        UNLOCK_BIGLOCK(d);
+    }
+
+    return 0;
 }
 
 /*
@@ -784,8 +870,8 @@ static inline int admin_io_okay(
     (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
 
 /* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT  4 /* user mode, read fault */
-#define USER_WRITE_FAULT 6 /* user mode, write fault */
+#define USER_READ_FAULT  (PGERR_user_mode)
+#define USER_WRITE_FAULT (PGERR_user_mode | PGERR_write_access)
 #define PAGE_FAULT(_faultaddr, _errcode)        \
 ({  propagate_page_fault(_faultaddr, _errcode); \
     return EXCRET_fault_fixed;                  \
diff -r 05ab081f3c67 -r e1ae7b3cb5b7 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Jun 16 18:08:27 2006 +0100
+++ b/xen/arch/x86/x86_32/traps.c       Fri Jun 16 18:18:55 2006 +0100
@@ -113,40 +113,6 @@ void show_page_walk(unsigned long addr)
     unmap_domain_page(l1t);
 }
 
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-#ifdef CONFIG_X86_PAE
-    l3_pgentry_t l3e, *l3t;
-#endif
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-#ifdef CONFIG_X86_PAE
-    l3t = map_domain_page(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    unmap_domain_page(l3t);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-#endif
-
-    l2t = map_domain_page(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    unmap_domain_page(l2t);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = map_domain_page(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    unmap_domain_page(l1t);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
-}
-
 #define DOUBLEFAULT_STACK_SIZE 1024
 static struct tss_struct doublefault_tss;
 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
diff -r 05ab081f3c67 -r e1ae7b3cb5b7 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Jun 16 18:08:27 2006 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Fri Jun 16 18:18:55 2006 +0100
@@ -115,40 +115,6 @@ void show_page_walk(unsigned long addr)
     printk("    L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
 }
 
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-    l4_pgentry_t l4e, *l4t;
-    l3_pgentry_t l3e, *l3t;
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-    l4t = mfn_to_virt(mfn);
-    l4e = l4t[l4_table_offset(addr)];
-    mfn = l4e_get_pfn(l4e);
-    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
-        return 0;
-
-    l3t = mfn_to_virt(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-
-    l2t = mfn_to_virt(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = mfn_to_virt(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
-}
-
 asmlinkage void double_fault(void);
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
diff -r 05ab081f3c67 -r e1ae7b3cb5b7 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Jun 16 18:08:27 2006 +0100
+++ b/xen/include/asm-x86/processor.h   Fri Jun 16 18:18:55 2006 +0100
@@ -128,6 +128,13 @@
 /* 'arch_vcpu' flags values */
 #define _TF_kernel_mode        0
 #define TF_kernel_mode         (1<<_TF_kernel_mode)
+
+/* #PF error code values. */
+#define PGERR_page_present   (1U<<0)
+#define PGERR_write_access   (1U<<1)
+#define PGERR_user_mode      (1U<<2)
+#define PGERR_reserved_bit   (1U<<3)
+#define PGERR_instr_fetch    (1U<<4)
 
 #ifndef __ASSEMBLY__
 
@@ -524,7 +531,6 @@ void show_stack(struct cpu_user_regs *re
 void show_stack(struct cpu_user_regs *regs);
 void show_registers(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
-int __spurious_page_fault(unsigned long addr);
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
 
 extern void mtrr_ap_init(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Make the spurious page-fault detection logic, Xen patchbot-unstable <=