WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [linux-2.6.18-xen] linux: allow use of split page table

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [linux-2.6.18-xen] linux: allow use of split page table locks
From: "Xen patchbot-linux-2.6.18-xen" <patchbot-linux-2.6.18-xen@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 05 Oct 2007 13:40:09 -0700
Delivery-date: Fri, 05 Oct 2007 13:40:41 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1191577746 -3600
# Node ID 6e26ffc60647bd7454d0a066a8ab63ef7f0123af
# Parent  ac1f33f633ba158a5427f24dbc31a1ee573a02b7
linux: allow use of split page table locks

This fixes the race condition previously experienced between
(un)pinning and vmscan.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 arch/i386/mm/pgtable-xen.c    |   66 +++++++++++++++++++++++++++++++++++++++---
 arch/x86_64/mm/pageattr-xen.c |   66 +++++++++++++++++++++++++++++++++++++++---
 mm/Kconfig                    |    3 -
 3 files changed, 124 insertions(+), 11 deletions(-)

diff -r ac1f33f633ba -r 6e26ffc60647 arch/i386/mm/pgtable-xen.c
--- a/arch/i386/mm/pgtable-xen.c        Wed Oct 03 15:02:54 2007 +0100
+++ b/arch/i386/mm/pgtable-xen.c        Fri Oct 05 10:49:06 2007 +0100
@@ -494,6 +494,64 @@ void make_pages_writable(void *va, unsig
        }
 }
 
+static void _pin_lock(struct mm_struct *mm, int lock) {
+       if (lock)
+               spin_lock(&mm->page_table_lock);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+       /* While mm->page_table_lock protects us against insertions and
+        * removals of higher level page table pages, it doesn't protect
+        * against updates of pte-s. Such updates, however, require the
+        * pte pages to be in consistent state (unpinned+writable or
+        * pinned+readonly). The pinning and attribute changes, however
+        * cannot be done atomically, which is why such updates must be
+        * prevented from happening concurrently.
+        * Note that no pte lock can ever elsewhere be acquired nesting
+        * with an already acquired one in the same mm, or with the mm's
+        * page_table_lock already acquired, as that would break in the
+        * non-split case (where all these are actually resolving to the
+        * one page_table_lock). Thus acquiring all of them here is not
+        * going to result in dead locks, and the order of acquires
+        * doesn't matter.
+        */
+       {
+               pgd_t *pgd = mm->pgd;
+               unsigned g;
+
+               for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+                       pud_t *pud;
+                       unsigned u;
+
+                       if (pgd_none(*pgd))
+                               continue;
+                       pud = pud_offset(pgd, 0);
+                       for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+                               pmd_t *pmd;
+                               unsigned m;
+
+                               if (pud_none(*pud))
+                                       continue;
+                               pmd = pmd_offset(pud, 0);
+                               for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+                                       spinlock_t *ptl;
+
+                                       if (pmd_none(*pmd))
+                                               continue;
+                                       ptl = pte_lockptr(0, pmd);
+                                       if (lock)
+                                               spin_lock(ptl);
+                                       else
+                                               spin_unlock(ptl);
+                               }
+                       }
+               }
+       }
+#endif
+       if (!lock)
+               spin_unlock(&mm->page_table_lock);
+}
+#define pin_lock(mm) _pin_lock(mm, 1)
+#define pin_unlock(mm) _pin_lock(mm, 0)
+
 static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
 {
        unsigned long pfn = page_to_pfn(page);
@@ -576,18 +634,18 @@ void mm_pin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
-       spin_lock(&mm->page_table_lock);
+       pin_lock(mm);
        __pgd_pin(mm->pgd);
-       spin_unlock(&mm->page_table_lock);
+       pin_unlock(mm);
 }
 
 void mm_unpin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
-       spin_lock(&mm->page_table_lock);
+       pin_lock(mm);
        __pgd_unpin(mm->pgd);
-       spin_unlock(&mm->page_table_lock);
+       pin_unlock(mm);
 }
 
 void mm_pin_all(void)
diff -r ac1f33f633ba -r 6e26ffc60647 arch/x86_64/mm/pageattr-xen.c
--- a/arch/x86_64/mm/pageattr-xen.c     Wed Oct 03 15:02:54 2007 +0100
+++ b/arch/x86_64/mm/pageattr-xen.c     Fri Oct 05 10:49:06 2007 +0100
@@ -19,6 +19,64 @@
 
 LIST_HEAD(mm_unpinned);
 DEFINE_SPINLOCK(mm_unpinned_lock);
+
+static void _pin_lock(struct mm_struct *mm, int lock) {
+       if (lock)
+               spin_lock(&mm->page_table_lock);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+       /* While mm->page_table_lock protects us against insertions and
+        * removals of higher level page table pages, it doesn't protect
+        * against updates of pte-s. Such updates, however, require the
+        * pte pages to be in consistent state (unpinned+writable or
+        * pinned+readonly). The pinning and attribute changes, however
+        * cannot be done atomically, which is why such updates must be
+        * prevented from happening concurrently.
+        * Note that no pte lock can ever elsewhere be acquired nesting
+        * with an already acquired one in the same mm, or with the mm's
+        * page_table_lock already acquired, as that would break in the
+        * non-split case (where all these are actually resolving to the
+        * one page_table_lock). Thus acquiring all of them here is not
+        * going to result in dead locks, and the order of acquires
+        * doesn't matter.
+        */
+       {
+               pgd_t *pgd = mm->pgd;
+               unsigned g;
+
+               for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+                       pud_t *pud;
+                       unsigned u;
+
+                       if (pgd_none(*pgd))
+                               continue;
+                       pud = pud_offset(pgd, 0);
+                       for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+                               pmd_t *pmd;
+                               unsigned m;
+
+                               if (pud_none(*pud))
+                                       continue;
+                               pmd = pmd_offset(pud, 0);
+                               for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+                                       spinlock_t *ptl;
+
+                                       if (pmd_none(*pmd))
+                                               continue;
+                                       ptl = pte_lockptr(0, pmd);
+                                       if (lock)
+                                               spin_lock(ptl);
+                                       else
+                                               spin_unlock(ptl);
+                               }
+                       }
+               }
+       }
+#endif
+       if (!lock)
+               spin_unlock(&mm->page_table_lock);
+}
+#define pin_lock(mm) _pin_lock(mm, 1)
+#define pin_unlock(mm) _pin_lock(mm, 0)
 
 static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
 {
@@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm)
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
 
-       spin_lock(&mm->page_table_lock);
+       pin_lock(mm);
 
        mm_walk(mm, PAGE_KERNEL_RO);
        if (HYPERVISOR_update_va_mapping(
@@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm)
        list_del(&mm->context.unpinned);
        spin_unlock(&mm_unpinned_lock);
 
-       spin_unlock(&mm->page_table_lock);
+       pin_unlock(mm);
 }
 
 void mm_unpin(struct mm_struct *mm)
@@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm)
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
 
-       spin_lock(&mm->page_table_lock);
+       pin_lock(mm);
 
        xen_pgd_unpin(__pa(mm->pgd));
        xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
@@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm)
        list_add(&mm->context.unpinned, &mm_unpinned);
        spin_unlock(&mm_unpinned_lock);
 
-       spin_unlock(&mm->page_table_lock);
+       pin_unlock(mm);
 }
 
 void mm_pin_all(void)
diff -r ac1f33f633ba -r 6e26ffc60647 mm/Kconfig
--- a/mm/Kconfig        Wed Oct 03 15:02:54 2007 +0100
+++ b/mm/Kconfig        Fri Oct 05 10:49:06 2007 +0100
@@ -127,14 +127,11 @@ comment "Memory hotplug is currently inc
 # Default to 4 for wider testing, though 8 might be more appropriate.
 # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
 # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
-# XEN on x86 architecture uses the mapping field on pagetable pages to store a
-# pointer to the destructor. This conflicts with pte_lock_deinit().
 #
 config SPLIT_PTLOCK_CPUS
        int
        default "4096" if ARM && !CPU_CACHE_VIPT
        default "4096" if PARISC && !PA20
-       default "4096" if X86_XEN || X86_64_XEN
        default "4"
 
 #

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [linux-2.6.18-xen] linux: allow use of split page table locks, Xen patchbot-linux-2.6.18-xen <=