WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Improve multi-processor XenLinux fork/exec/destroy times

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Improve multi-processor XenLinux fork/exec/destroy times. We do this
From: BitKeeper Bot <riel@xxxxxxxxxxx>
Date: Wed, 27 Apr 2005 14:42:32 +0000
Delivery-date: Wed, 27 Apr 2005 15:02:00 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: Xen Development List <xen-devel@xxxxxxxxxxxxxxxxxxx>
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
ChangeSet 1.1391, 2005/04/27 15:42:32+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Improve multi-processor XenLinux fork/exec/destroy times. We do this
        by lazily pinning page-tables for p.t. use, and aggressively unpinning
        them on last use, to put as little pressure on the batched wrpt
        interface as possible. Basically this means that the copy loop and
        destroy loop will usually be able to directly write pagetables with no
        Xen intervention at all (implicit or explicit).
        Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>



 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c             |    2 
 linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c                |    9 
 linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c             |  185 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h         |   22 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h |   26 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h     |   21 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h     |   12 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h    |    9 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h  |    9 
 linux-2.6.11-xen-sparse/mm/mmap.c                              | 2108 
++++++++++
 xen/arch/x86/mm.c                                              |   21 
 11 files changed, 2295 insertions(+), 129 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c        2005-04-27 
11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c        2005-04-27 
11:02:21 -04:00
@@ -100,8 +100,8 @@
        struct mm_struct * old_mm;
        int retval = 0;
 
+       memset(&mm->context, 0, sizeof(mm->context));
        init_MUTEX(&mm->context.sem);
-       mm->context.size = 0;
        old_mm = current->mm;
        if (old_mm && old_mm->context.size > 0) {
                down(&old_mm->context.sem);
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c   2005-04-27 11:02:21 
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c   2005-04-27 11:02:21 
-04:00
@@ -710,18 +710,9 @@
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
-kmem_cache_t *pte_cache;
 
 void __init pgtable_cache_init(void)
 {
-       pte_cache = kmem_cache_create("pte",
-                               PTRS_PER_PTE*sizeof(pte_t),
-                               PTRS_PER_PTE*sizeof(pte_t),
-                               0,
-                               pte_ctor,
-                               pte_dtor);
-       if (!pte_cache)
-               panic("pgtable_cache_init(): Cannot create pte cache");
        if (PTRS_PER_PMD > 1) {
                pmd_cache = kmem_cache_create("pmd",
                                        PTRS_PER_PMD*sizeof(pmd_t),
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c        2005-04-27 
11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c        2005-04-27 
11:02:21 -04:00
@@ -198,59 +198,35 @@
        return pte;
 }
 
-void pte_ctor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
-       struct page *page = virt_to_page(pte);
-       SetPageForeign(page, pte_free);
-       set_page_count(page, 1);
-
-       clear_page(pte);
-       make_page_readonly(pte);
-       xen_pte_pin(__pa(pte));
-}
-
-void pte_dtor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
-       struct page *page = virt_to_page(pte);
-       ClearPageForeign(page);
-
-       xen_pte_unpin(__pa(pte));
-       make_page_writable(pte);
-}
-
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-       pte_t *ptep;
-
-#ifdef CONFIG_HIGHPTE
        struct page *pte;
 
+#ifdef CONFIG_HIGHPTE
        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-       if (pte == NULL)
-               return pte;
-       if (PageHighMem(pte))
-               return pte;
-       /* not a highmem page -- free page and grab one from the cache */
-       __free_page(pte);
+#else
+       pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+       if (pte) {
+               SetPageForeign(pte, pte_free);
+               set_page_count(pte, 1);
+       }
 #endif
-       ptep = kmem_cache_alloc(pte_cache, GFP_KERNEL);
-       if (ptep)
-               return virt_to_page(ptep);
-       return NULL;
+
+       return pte;
 }
 
 void pte_free(struct page *pte)
 {
+       unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+       if (!pte_write(*virt_to_ptep(va)))
+               HYPERVISOR_update_va_mapping(
+                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+
+       ClearPageForeign(pte);
        set_page_count(pte, 1);
-#ifdef CONFIG_HIGHPTE
-       if (!PageHighMem(pte))
-#endif
-               kmem_cache_free(pte_cache,
-                               phys_to_virt(page_to_pseudophys(pte)));
-#ifdef CONFIG_HIGHPTE
-       else
-               __free_page(pte);
-#endif
+
+       __free_page(pte);
 }
 
 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
@@ -305,14 +281,11 @@
                        (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 
        if (PTRS_PER_PMD > 1)
-               goto out;
+               return;
 
        pgd_list_add(pgd);
        spin_unlock_irqrestore(&pgd_lock, flags);
        memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- out:
-       make_page_readonly(pgd);
-       xen_pgd_pin(__pa(pgd));
 }
 
 /* never called when PTRS_PER_PMD > 1 */
@@ -320,9 +293,6 @@
 {
        unsigned long flags; /* can be called from interrupt context */
 
-       xen_pgd_unpin(__pa(pgd));
-       make_page_writable(pgd);
-
        if (PTRS_PER_PMD > 1)
                return;
 
@@ -357,6 +327,15 @@
 void pgd_free(pgd_t *pgd)
 {
        int i;
+       pte_t *ptep = virt_to_ptep(pgd);
+
+       if (!pte_write(*ptep)) {
+               xen_pgd_unpin(__pa(pgd));
+               HYPERVISOR_update_va_mapping(
+                       (unsigned long)pgd,
+                       pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
+                       0);
+       }
 
        /* in the PAE case user pgd entries are overwritten before usage */
        if (PTRS_PER_PMD > 1)
@@ -369,28 +348,19 @@
 #ifndef CONFIG_XEN_SHADOW_MODE
 void make_lowmem_page_readonly(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_wrprotect(*pte));
 }
 
 void make_lowmem_page_writable(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_mkwrite(*pte));
 }
 
 void make_page_readonly(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_wrprotect(*pte));
        if ( (unsigned long)va >= (unsigned long)high_memory )
        {
@@ -405,10 +375,7 @@
 
 void make_page_writable(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_mkwrite(*pte));
        if ( (unsigned long)va >= (unsigned long)high_memory )
        {
@@ -439,3 +406,91 @@
        }
 }
 #endif /* CONFIG_XEN_SHADOW_MODE */
+
+void mm_pin(struct mm_struct *mm)
+{
+    pgd_t       *pgd;
+    struct page *page;
+    int          i;
+
+    spin_lock(&mm->page_table_lock);
+
+    for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )
+    {
+        if ( *(unsigned long *)pgd == 0 )
+            continue;
+        page = pmd_page(*(pmd_t *)pgd);
+        if ( !PageHighMem(page) )
+            HYPERVISOR_update_va_mapping(
+                (unsigned long)__va(page_to_pfn(page)<<PAGE_SHIFT),
+                pfn_pte(page_to_pfn(page), PAGE_KERNEL_RO), 0);
+    }
+
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 0);
+    xen_pgd_pin(__pa(mm->pgd));
+
+    mm->context.pinned = 1;
+
+    spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+    pgd_t       *pgd;
+    struct page *page;
+    int          i;
+
+    spin_lock(&mm->page_table_lock);
+
+    xen_pgd_unpin(__pa(mm->pgd));
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+
+    for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Improve multi-processor XenLinux fork/exec/destroy times. We do this, BitKeeper Bot <=